]> Git — Sourcephile - gargantext.git/blob - src/Data/Gargantext/Ngrams/Parser.hs
[FEAT] Ngrams metrics, thanks to text-metrics to begin with.
[gargantext.git] / src / Data / Gargantext / Ngrams / Parser.hs
1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE ScopedTypeVariables #-}
3
4 module Data.Gargantext.Ngrams.Parser where
5
6 import Data.Gargantext.Prelude
7 import Data.Gargantext.Ngrams.CoreNLP
8
9
10 import Data.Gargantext.Types.Main (Language(..), Ngrams)
11 import qualified Data.Gargantext.Ngrams.Lang.En as En
12 import qualified Data.Gargantext.Ngrams.Lang.Fr as Fr
13
14
15 -- TODO for scientific papers: add maesures
16 -- TODO add the p score regex
17 extractNgrams :: Language -> String -> IO [[Ngrams]]
18 extractNgrams lang s = pm (groupNgrams lang) <$> extractNgrams' s
19
20
21 extractNgrams' :: String -> IO [[Ngrams]]
22 extractNgrams' t = pm (pm token2text)
23 <$> pm _sentenceTokens
24 <$> sentences
25 <$> corenlp t
26
27 -- | This function selects ngrams according to grammars specific
28 -- of each language.
29 -- In english, JJ is ADJectiv in french.
30 selectNgrams :: Language -> [Ngrams] -> [Ngrams]
31 selectNgrams EN = En.selectNgrams
32 selectNgrams FR = Fr.selectNgrams
33
34 -- | This function analyze and groups (or not) ngrams according to
35 -- grammars specific of each language.
36 groupNgrams :: Language -> [Ngrams] -> [Ngrams]
37 groupNgrams EN = En.groupNgrams
38 groupNgrams FR = Fr.groupNgrams
39
40