]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Terms/Multi.hs
[RENAME/ORG] ngrams -> terms.
[gargantext.git] / src / Gargantext / Text / Terms / Multi.hs
1 {-|
2 Module : Gargantext.Text.Terms.Multi
3 Description : Multi Terms module
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Multi-terms are ngrams where n > 1.
11
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15
16 module Gargantext.Text.Terms.Multi (extractTokenTags)
17 where
18
19 import Data.Text hiding (map, group)
20
21 import Gargantext.Prelude
22 import Gargantext.Core (Lang(..))
23 import Gargantext.Core.Types
24
25 import Gargantext.Text.Terms.Multi.PosTagging
26 import qualified Gargantext.Text.Terms.Multi.Lang.En as En
27 import qualified Gargantext.Text.Terms.Multi.Lang.Fr as Fr
28
29
30 extractTokenTags :: Lang -> Text -> IO [[TokenTag]]
31 extractTokenTags lang s = map (group lang) <$> extractTokenTags' lang s
32
33
34 extractTokenTags' :: Lang -> Text -> IO [[TokenTag]]
35 extractTokenTags' lang t = map tokens2tokensTags
36 <$> map _sentenceTokens
37 <$> _sentences
38 <$> corenlp lang t
39
40 ---- | This function analyses and groups (or not) ngrams according to
41 ---- specific grammars of each language.
42 group :: Lang -> [TokenTag] -> [TokenTag]
43 group EN = En.group
44 group FR = Fr.group
45