]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Terms/Mono.hs
[FIX] Corenlp lems -> using stem.
[gargantext.git] / src / Gargantext / Text / Terms / Mono.hs
1 {-|
2 Module : Gargantext.Text.Terms.Mono
3 Description : Mono Terms module
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mono-terms are Nterms where n == 1.
11
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15
16 module Gargantext.Text.Terms.Mono (monoterms, monoterms')
17 where
18
19 import Data.Text (Text, toLower, split, splitOn, pack)
20 import qualified Data.Set as S
21
22 import Gargantext.Core
23 import Gargantext.Core.Types
24 import Gargantext.Text.Terms.Mono.Stem (stem)
25
26 import Gargantext.Prelude
27 import Data.Char (isAlphaNum, isSpace)
28
29 monoterms' :: Lang -> Text -> [Terms]
30 monoterms' l txt = map (text2terms l) $ monoterms txt
31
32 monoterms :: Text -> [Text]
33 monoterms txt = map toLower $ split isWord txt
34 where
35 isWord c = c `elem` [' ', '\'', ',', ';']
36
37 text2terms :: Lang -> Text -> Terms
38 text2terms lang txt = Terms label stems
39 where
40 label = splitOn (pack " ") txt
41 stems = S.fromList $ map (stem lang) label
42
43 --monograms :: Text -> [Text]
44 --monograms xs = monograms $ toLower $ filter isGram xs
45
46 isGram :: Char -> Bool
47 isGram c = isAlphaNum c || isSpace c || c `elem` ['-','/','\'']
48