src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs

   1 {-|
   2 Module      : Gargantext.Core.Text.Terms.Multi.Lang.En
   3 Description : English Grammar rules to group postag tokens.
   4 Copyright   : (c) CNRS, 2017-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 Rule-based grammars are computed in this english module in order to group
  11 the tokens into extracted terms.
  12
  13 -}
  14
  15
  16 module Gargantext.Core.Text.Terms.Multi.Lang.En (groupTokens)
  17   where
  18
  19 import Gargantext.Prelude
  20 import Gargantext.Core.Types
  21 import Gargantext.Core.Text.Terms.Multi.Group
  22
  23 ------------------------------------------------------------------------
  24 -- | Rule grammar to group tokens
  25 groupTokens :: [TokenTag] -> [TokenTag]
  26 groupTokens []    = []
  27 groupTokens ntags = group2 NP NP
  28                   $ group2 NP VB
  29         --          $ group2 NP IN
  30                   $ group2 IN DT
  31         --          $ group2 VB NP
  32                   $ group2 JJ NP
  33                   $ group2 JJ JJ
  34                   $ group2 JJ CC
  35                   $ ntags
  36
  37 ------------------------------------------------------------------------
  38 --groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs)             = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
  39 --groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
  40 --groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs)         = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
  41 --
  42 --groupNgrams (x:xs)                                          = (x:(groupNgrams xs))
  43