2 Module : Gargantext.Core.Text.Terms.Mono
3 Description : Mono Terms module
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Mono-terms are Nterms where n == 1.
15 module Gargantext.Core.Text.Terms.Mono (monoTerms, monoTexts, monoTextsBySentence, words)
18 import Prelude (String)
20 import Data.Text (Text)
21 import qualified Data.Text as T
23 import qualified Data.List as L
24 import qualified Data.Set as S
26 import Gargantext.Core
27 import Gargantext.Core.Types
28 import Gargantext.Core.Text.Terms.Mono.Stem (stem)
30 import Gargantext.Prelude
31 --import Data.Char (isAlphaNum, isSpace)
33 -- | TODO remove Num ?
34 --isGram c = isAlphaNum c
36 words :: Text -> [Text]
39 -- | Sentence split separators
41 isSep = (`elem` (",.:;?!(){}[]\"\'" :: String))
43 monoTerms :: Lang -> Text -> [TermsWithCount]
44 monoTerms l txt = map (\t -> (monoText2term l t, 1)) $ monoTexts txt
46 monoTexts :: Text -> [Text]
47 monoTexts = L.concat . monoTextsBySentence
49 -- | TODO use text2term only
50 monoText2term :: Lang -> Text -> Terms
51 monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt)
53 monoTextsBySentence :: Text -> [[Text]]
54 monoTextsBySentence = map T.words