1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE ScopedTypeVariables #-}
4 module Data.Gargantext.Ngrams.Parser where
6 import Data.Gargantext.Prelude
7 import Data.Gargantext.NLP.CoreNLP
10 import Data.Gargantext.Types.Main (Language(..), Ngrams)
11 import qualified Data.Gargantext.Ngrams.Lang.En as En
12 import qualified Data.Gargantext.Ngrams.Lang.Fr as Fr
15 -- TODO for scientific papers: add maesures
16 -- TODO add the p score regex
17 extractNgrams :: String -> IO [[Ngrams]]
18 extractNgrams t = pm (groupNgrams EN) <$> extractNgrams' t
21 extractNgrams' :: String -> IO [[Ngrams]]
22 extractNgrams' t = pm (pm token2text)
23 <$> pm _sentenceTokens
27 -- | This function selects ngrams according to grammars specific
29 -- In english, JJ is ADJectiv in french.
30 selectNgrams :: Language -> [Ngrams] -> [Ngrams]
31 selectNgrams EN = En.selectNgrams
32 selectNgrams FR = Fr.selectNgrams
34 -- | This function analyze and groups (or not) ngrams according to
35 -- grammars specific of each language.
36 groupNgrams :: Language -> [Ngrams] -> [Ngrams]
37 groupNgrams EN = En.groupNgrams
38 groupNgrams FR = Fr.groupNgrams