2 Module : Gargantext.Text.Terms.WithList
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE BangPatterns #-}
18 module Gargantext.Text.Terms.WithList where
20 import qualified Data.Algorithms.KMP as KMP
21 import Data.Text (Text)
22 import qualified Data.IntMap.Strict as IntMap
24 import Gargantext.Core.Types (Terms(..))
25 import Gargantext.Text.Context
26 import Gargantext.Text.Terms.Mono (monoTextsBySentence)
28 import Prelude (error)
29 import Gargantext.Prelude
30 import Data.List (null, concatMap)
32 import qualified Data.Set as Set
35 ------------------------------------------------------------------------
37 data Pattern = Pattern
38 { _pat_table :: !(KMP.Table Term)
40 , _pat_terms :: !Terms
42 type Patterns = [Pattern]
44 ------------------------------------------------------------------------
46 replaceTerms :: Patterns -> Sentence Term -> Sentence Terms
47 replaceTerms pats terms = go 0
49 terms_len = length terms
51 go ix | ix >= terms_len = []
53 case IntMap.lookup ix m of
54 Nothing -> go (ix + 1)
59 merge (len1, lab1) (len2, lab2) =
60 if len2 < len1 then (len1, lab1) else (len2, lab2)
63 IntMap.fromListWith merge
65 | Pattern pat len term <- pats, ix <- KMP.match pat terms ]
67 buildPatterns :: TermList -> Patterns
68 buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
70 buildPattern (label, alts) = map f (label : alts)
72 f alt | "" `elem` alt = error "buildPatterns: ERR1"
73 | null alt = error "buildPatterns: ERR2"
75 Pattern (KMP.build alt) (length alt)
76 (Terms label $ Set.empty) -- TODO check stems
78 extractTermsWithList :: Patterns -> Text -> Corpus Terms
79 extractTermsWithList pats = map (replaceTerms pats) . monoTextsBySentence