]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Terms/Mono.hs
[WithList] adding labelPolicy.
[gargantext.git] / src / Gargantext / Text / Terms / Mono.hs
1 {-|
2 Module : Gargantext.Text.Terms.Mono
3 Description : Mono Terms module
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mono-terms are Nterms where n == 1.
11
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15
16 module Gargantext.Text.Terms.Mono (monoTerms, monoTexts, monoTextsBySentence)
17 where
18
19 import Prelude (String)
20 import Data.Char (isSpace)
21 import Data.Text (Text, toLower, split, splitOn, pack)
22
23 import Data.Text (Text)
24 import qualified Data.Text as T
25
26 import qualified Data.List as L
27 import qualified Data.Set as S
28
29 import Gargantext.Core
30 import Gargantext.Core.Types
31 import Gargantext.Text.Terms.Mono.Stem (stem)
32
33 import Gargantext.Prelude
34 --import Data.Char (isAlphaNum, isSpace)
35
36 -- | TODO remove Num ?
37 --isGram c = isAlphaNum c
38
39
40 -- | Sentence split separators
41 isSep :: Char -> Bool
42 isSep = (`elem` (",.:;?!(){}[]\"" :: String))
43
44
45 monoTerms :: Lang -> Text -> [Terms]
46 monoTerms l txt = map (monoText2term l) $ monoTexts txt
47
48 monoTexts :: Text -> [Text]
49 monoTexts = L.concat . monoTextsBySentence
50
51 monoText2term :: Lang -> Text -> Terms
52 monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt)
53
54
55 monoTextsBySentence :: Text -> [[Text]]
56 monoTextsBySentence = map (T.split isSpace)
57 . T.split isSep
58 . T.toLower
59
60
61
62