2 Module : Gargantext.Text.Terms.WithList
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE BangPatterns #-}
17 module Gargantext.Text.Terms.WithList where
19 import qualified Data.Algorithms.KMP as KMP
20 import Data.Text (Text)
21 import qualified Data.IntMap.Strict as IntMap
23 import Gargantext.Text.Context
24 import Gargantext.Text.Terms.Mono (monoTextsBySentence)
26 import Gargantext.Prelude
27 import Data.List (concatMap)
30 ------------------------------------------------------------------------
32 type Pattern = KMP.Table Term
33 type Patterns = [(Pattern, Int, Label)]
35 ------------------------------------------------------------------------
37 replaceTerms :: Patterns -> Sentence Term -> Sentence Label
38 replaceTerms pats terms = go 0 terms
42 case IntMap.lookup ix m of
43 Nothing -> t : go (ix + 1) ts
45 label : go (ix + len) (drop (len - 1) ts)
47 merge (len1, lab1) (len2, lab2) =
48 if len2 < len1 then (len1, lab1) else (len2, lab2)
51 IntMap.fromListWith merge
53 | (pat, len, label) <- pats, ix <- KMP.match pat terms ]
55 buildPatterns :: TermList -> Patterns
56 buildPatterns = concatMap buildPattern
58 buildPattern (label, alts) = map f alts
60 f alt = (KMP.build alt, length alt, label)
62 extractTermsWithList :: Patterns -> Text -> Corpus Label
63 extractTermsWithList pats = map (replaceTerms pats) . monoTextsBySentence