]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Terms/Multi/Lang/Fr.hs
[FIX] bug in FlowCont Semigroup instance (intersection for cont)
[gargantext.git] / src / Gargantext / Core / Text / Terms / Multi / Lang / Fr.hs
1 {-|
2 Module : Gargantext.Core.Text.Terms.Multi.Lang.Fr
3 Description : French Grammar rules to group postag tokens.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 This @group@ function groups horizontally ngrams in their context of
11 sentence according to grammars specific of each language. In english, JJ
12 is ADJectiv in french.
13
14 -}
15
16
17 module Gargantext.Core.Text.Terms.Multi.Lang.Fr (groupTokens)
18 where
19
20 import Gargantext.Prelude
21 import Gargantext.Core.Types
22 import Gargantext.Core.Text.Terms.Multi.Group (group2)
23
24 groupTokens :: [TokenTag] -> [TokenTag]
25 groupTokens [] = []
26 groupTokens ntags = group2 NP NP
27 $ group2 NP VB
28 -- group2 NP IN
29 -- group2 IN DT
30 $ group2 VB NP
31 $ group2 JJ NP
32 $ group2 NP JJ
33 $ group2 JJ JJ
34 -- group2 JJ CC
35 $ ntags
36
37 ------------------------------------------------------------------------
38 -- TODO
39 --groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
40 --groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)