]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs
Merge remote-tracking branch 'origin/445-dev-doc-upload-lang' into dev
[gargantext.git] / src / Gargantext / Core / Text / Terms / Multi / Lang / En.hs
1 {-|
2 Module : Gargantext.Core.Text.Terms.Multi.Lang.En
3 Description : English Grammar rules to group postag tokens.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Rule-based grammars are computed in this english module in order to group
11 the tokens into extracted terms.
12
13 -}
14
15
16 module Gargantext.Core.Text.Terms.Multi.Lang.En (groupTokens)
17 where
18
19 import Gargantext.Prelude
20 import Gargantext.Core.Types
21 import Gargantext.Core.Text.Terms.Multi.Group
22
23 ------------------------------------------------------------------------
24 -- | Rule grammar to group tokens
25 groupTokens :: [TokenTag] -> [TokenTag]
26 groupTokens [] = []
27 groupTokens ntags = group2 NP NP
28 -- $ group2 NP VB
29 -- $ group2 NP IN
30 $ group2 IN DT
31 -- $ group2 VB NP
32 $ group2 JJ NP
33 $ group2 JJ JJ
34 $ group2 JJ CC
35 $ ntags
36
37 ------------------------------------------------------------------------
38 --groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
39 --groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
40 --groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs) = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
41 --
42 --groupNgrams (x:xs) = (x:(groupNgrams xs))