2 Module : Gargantext.Text.Terms.WithList
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE BangPatterns #-}
18 module Gargantext.Text.Terms.WithList where
20 import Data.List (null, concatMap)
22 import Data.Text (Text, concat)
23 import Gargantext.Prelude
24 import Gargantext.Text.Context
25 import Gargantext.Text.Terms.Mono (monoTextsBySentence)
26 import Prelude (error)
27 import qualified Data.Algorithms.KMP as KMP
28 import qualified Data.IntMap.Strict as IntMap
30 ------------------------------------------------------------------------
32 data Pattern = Pattern
33 { _pat_table :: !(KMP.Table Text)
35 , _pat_terms :: ![Text]
37 type Patterns = [Pattern]
39 ------------------------------------------------------------------------
40 replaceTerms :: Patterns -> [Text] -> [[Text]]
41 replaceTerms pats terms = go 0
43 terms_len = length terms
45 go ix | ix >= terms_len = []
47 case IntMap.lookup ix m of
48 Nothing -> go (ix + 1)
53 merge (len1, lab1) (len2, lab2) =
54 if len2 < len1 then (len1, lab1) else (len2, lab2)
57 IntMap.fromListWith merge
59 | Pattern pat len term <- pats, ix <- KMP.match pat terms ]
61 buildPatterns :: TermList -> Patterns
62 buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
64 buildPattern (label, alts) = map f (label : alts)
66 f alt | "" `elem` alt = error "buildPatterns: ERR1"
67 | null alt = error "buildPatterns: ERR2"
69 Pattern (KMP.build alt) (length alt) label
70 --(Terms label $ Set.empty) -- TODO check stems
72 extractTermsWithList :: Patterns -> Text -> Corpus [Text]
73 extractTermsWithList pats = map (replaceTerms pats) . monoTextsBySentence
76 -- >>> let termList = [(["chat blanc"], [["chat","blanc"]])] :: TermList
77 -- extractTermsWithList' (buildPatterns termList) "Le chat blanc"["chat blanc"]
79 extractTermsWithList' :: Patterns -> Text -> [Text]
80 extractTermsWithList' pats = map (concat . map concat . replaceTerms pats)
84 filterWith :: TermList
88 filterWith termList f xs = filterWith' termList f zip xs
91 filterWith' :: TermList
93 -> ([a] -> [[Text]] -> [b])
96 filterWith' termList f f' xs = f' xs
97 $ map (extractTermsWithList' pats)
100 pats = buildPatterns termList