2 Module : Gargantext.Ngrams.Lang.Fr
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Ngrams.Lang.Fr (selectNgrams, groupNgrams, textTest)
20 import Gargantext.Prelude
21 import Data.Text (Text)
22 import Data.Monoid ((<>))
24 selectNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
25 selectNgrams xs = filter selectNgrams' xs
27 selectNgrams' (_,"N" ,_ ) = True
28 selectNgrams' (_,"NC" ,_ ) = True
29 selectNgrams' (_,"NN+CC",_ ) = True
30 selectNgrams' (_,_ ,"PERSON" ) = True
31 selectNgrams' (_,_ ,"ORGANIZATION") = True
32 selectNgrams' (_,_ ,"LOCATION" ) = True
33 selectNgrams' (_,_ ,_ ) = False
36 groupNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
39 --groupNgrams ((_,"DET",_):xs) = groupNgrams xs
41 -- "Groupe : nom commun et adjectifs avec conjonction"
42 groupNgrams ((n,"NC",n'):(j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",_):xs) = groupNgrams (n1:n2:xs)
44 n1 = (n <> " " <> j1, "NC", n')
45 n2 = (n <> " " <> j2, "NC", n')
47 -- /!\ sometimes N instead of NC (why?)
48 groupNgrams ((n,"N",n'):(j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",_):xs) = groupNgrams (n1:n2:xs)
50 n1 = (n <> " " <> j1, "N", n')
51 n2 = (n <> " " <> j2, "N", n')
53 -- Groupe : Adjectif + Conjonction de coordination + Adjectif
54 -- groupNgrams ((j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",j2'):xs) = groupNgrams ((j1 <> " " <> j2, "ADJ", j2'):xs)
56 -- Groupe : Nom commun + préposition + Nom commun
57 groupNgrams ((n1,"NC",_):(p,"P",_):(n2,"NC",n2'):xs) = groupNgrams ((n1 <> " " <> p <> " " <> n2, "NC", n2'):xs)
58 groupNgrams ((n1,"NC",_):(p,"P",_):(n2,"NPP",n2'):xs) = groupNgrams ((n1 <> " " <> p <> " " <> n2, "NC", n2'):xs)
59 groupNgrams ((n1,"NC",_):(prep,"P",_):(det,"DET",_):(n2,"NPP",n2'):xs) = groupNgrams ((n1 <> " " <> prep <> " " <> det <> " " <> n2, "NC", n2'):xs)
61 -- Groupe : Plusieurs adjectifs successifs
62 groupNgrams ((x,"ADJ",_):(y,"ADJ",yy):xs) = groupNgrams ((x <> " " <> y, "ADJ", yy):xs)
64 -- Groupe : nom commun et adjectif
65 groupNgrams ((x,"NC",_):(y,"ADJ",yy):xs) = groupNgrams ((x <> " " <> y, "NC", yy):xs)
66 -- /!\ sometimes N instead of NC (why?)
67 groupNgrams ((x,"N",_):(y,"ADJ",yy):xs) = groupNgrams ((x <> " " <> y, "NC", yy):xs)
69 -- Groupe : adjectif et nom commun
70 groupNgrams ((x,"ADJ",_):(y,"NC",yy):xs) = groupNgrams ((x <> " " <> y, "NC", yy):xs)
71 -- /!\ sometimes N instead of NC (why?)
72 groupNgrams ((x,"ADJ",_):(y,"N",yy):xs) = groupNgrams ((x <> " " <> y, "NC", yy):xs)
75 groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
76 groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
79 -- Si aucune des règles précédentes n'est remplie
80 groupNgrams (x:xs) = (x:(groupNgrams xs))
84 textTest = [ "L'heure d'arrivée des coureurs dépend de la météo du jour."]