2 Module : Gargantext.Text.Terms.Multi
3 Description : Multi Terms module
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Multi-terms are ngrams where n > 1.
14 {-# LANGUAGE NoImplicitPrelude #-}
16 module Gargantext.Text.Terms.Multi (multiterms)
19 import Data.Text hiding (map, group, filter, concat)
20 import Data.List (concat)
22 import Gargantext.Prelude
23 import Gargantext.Core (Lang(..))
24 import Gargantext.Core.Types
26 import Gargantext.Text.Terms.Multi.PosTagging
27 import qualified Gargantext.Text.Terms.Multi.Lang.En as En
28 import qualified Gargantext.Text.Terms.Multi.Lang.Fr as Fr
30 multiterms :: Lang -> Text -> IO [Terms]
31 multiterms lang txt = concat
32 <$> map (map tokenTag2terms)
33 <$> map (filter (\t -> _my_token_pos t == Just NP))
34 <$> tokenTags lang txt
36 tokenTag2terms :: TokenTag -> Terms
37 tokenTag2terms (TokenTag w t _ _) = Terms w t
39 tokenTags :: Lang -> Text -> IO [[TokenTag]]
40 tokenTags lang s = map (group lang) <$> tokenTags' lang s
43 tokenTags' :: Lang -> Text -> IO [[TokenTag]]
44 tokenTags' lang t = map tokens2tokensTags
45 <$> map _sentenceTokens
49 ---- | This function analyses and groups (or not) ngrams according to
50 ---- specific grammars of each language.
51 group :: Lang -> [TokenTag] -> [TokenTag]