2 Module : Gargantext.Text.Terms.Multi
3 Description : Multi Terms module
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Multi-terms are ngrams where n > 1.
14 {-# LANGUAGE NoImplicitPrelude #-}
16 module Gargantext.Text.Terms.Multi (extractTokenTags)
19 import Data.Text hiding (map, group)
21 import Gargantext.Prelude
22 import Gargantext.Core (Lang(..))
23 import Gargantext.Core.Types
25 import Gargantext.Text.Terms.Multi.PosTagging
26 import qualified Gargantext.Text.Terms.Multi.Lang.En as En
27 import qualified Gargantext.Text.Terms.Multi.Lang.Fr as Fr
30 extractTokenTags :: Lang -> Text -> IO [[TokenTag]]
31 extractTokenTags lang s = map (group lang) <$> extractTokenTags' lang s
34 extractTokenTags' :: Lang -> Text -> IO [[TokenTag]]
35 extractTokenTags' lang t = map tokens2tokensTags
36 <$> map _sentenceTokens
40 ---- | This function analyses and groups (or not) ngrams according to
41 ---- specific grammars of each language.
42 group :: Lang -> [TokenTag] -> [TokenTag]