2 Module : Gargantext.Text.Terms.Mono
3 Description : Mono Terms module
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Mono-terms are Nterms where n == 1.
14 {-# LANGUAGE NoImplicitPrelude #-}
16 module Gargantext.Text.Terms.Mono (monoTerms, monoTexts, monoTextsBySentence)
19 import Prelude (String)
20 import Data.Char (isSpace)
21 import Data.Text (Text, toLower, split, splitOn, pack)
23 import Data.Text (Text)
24 import qualified Data.Text as T
26 import qualified Data.List as L
27 import qualified Data.Set as S
29 import Gargantext.Core
30 import Gargantext.Core.Types
31 import Gargantext.Text.Terms.Mono.Stem (stem)
33 import Gargantext.Prelude
34 --import Data.Char (isAlphaNum, isSpace)
36 -- | TODO remove Num ?
37 --isGram c = isAlphaNum c
40 -- | Sentence split separators
42 isSep = (`elem` (",.:;?!(){}[]\"" :: String))
45 monoTerms :: Lang -> Text -> [Terms]
46 monoTerms l txt = map (monoText2term l) $ monoTexts txt
48 monoTexts :: Text -> [Text]
49 monoTexts = L.concat . monoTextsBySentence
51 monoText2term :: Lang -> Text -> Terms
52 monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt)
55 monoTextsBySentence :: Text -> [[Text]]
56 monoTextsBySentence = map (T.split isSpace)