{-| Module : Gargantext.Text.Terms.Multi Description : Multi Terms module Copyright : (c) CNRS, 2017 - present License : AGPL + CECILL v3 Maintainer : team@gargantext.org Stability : experimental Portability : POSIX Multi-terms are ngrams where n > 1. -} {-# LANGUAGE NoImplicitPrelude #-} module Gargantext.Text.Terms.Multi (multiterms) where import Data.Text hiding (map, group, filter, concat) import Data.List (concat) import Gargantext.Prelude import Gargantext.Core (Lang(..)) import Gargantext.Core.Types import Gargantext.Text.Terms.Multi.PosTagging import qualified Gargantext.Text.Terms.Multi.Lang.En as En import qualified Gargantext.Text.Terms.Multi.Lang.Fr as Fr multiterms :: Lang -> Text -> IO [Terms] multiterms lang txt = concat <$> map (map tokenTag2terms) <$> map (filter (\t -> _my_token_pos t == Just NP)) <$> tokenTags lang txt tokenTag2terms :: TokenTag -> Terms tokenTag2terms (TokenTag w t _ _) = Terms w t tokenTags :: Lang -> Text -> IO [[TokenTag]] tokenTags lang s = map (group lang) <$> tokenTags' lang s tokenTags' :: Lang -> Text -> IO [[TokenTag]] tokenTags' lang t = map tokens2tokensTags <$> map _sentenceTokens <$> _sentences <$> corenlp lang t ---- | This function analyses and groups (or not) ngrams according to ---- specific grammars of each language. group :: Lang -> [TokenTag] -> [TokenTag] group EN = En.group group FR = Fr.group