]> Git — Sourcephile - gargantext.git/blob - src/Data/Gargantext/Ngrams/Lang/Fr.hs
[NGRAMS] improving ngrams extraction with prep (of/de) respectively in eng/fr.
[gargantext.git] / src / Data / Gargantext / Ngrams / Lang / Fr.hs
1 {-# LANGUAGE OverloadedStrings #-}
2
3 module Data.Gargantext.Ngrams.Lang.Fr (selectNgrams, groupNgrams, textTest)
4 where
5
6 import Data.Gargantext.Prelude
7 import Data.Text (Text)
8 import Data.Monoid ((<>))
9
10 selectNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
11 selectNgrams xs = pf selectNgrams' xs
12 where
13 selectNgrams' (_,"N" ,_ ) = True
14 selectNgrams' (_,"NC" ,_ ) = True
15 selectNgrams' (_,"NN+CC",_ ) = True
16 selectNgrams' (_,_ ,"PERSON" ) = True
17 selectNgrams' (_,_ ,"ORGANIZATION") = True
18 selectNgrams' (_,_ ,"LOCATION" ) = True
19 selectNgrams' (_,_ ,_ ) = False
20
21
22 groupNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
23 groupNgrams [] = []
24
25 --groupNgrams ((_,"DET",_):xs) = groupNgrams xs
26
27 -- "Groupe : nom commun et adjectifs avec conjonction"
28 groupNgrams ((n,"NC",n'):(j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",_):xs) = groupNgrams (n1:n2:xs)
29 where
30 n1 = (n <> " " <> j1, "NC", n')
31 n2 = (n <> " " <> j2, "NC", n')
32
33 -- /!\ sometimes N instead of NC (why?)
34 groupNgrams ((n,"N",n'):(j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",_):xs) = groupNgrams (n1:n2:xs)
35 where
36 n1 = (n <> " " <> j1, "N", n')
37 n2 = (n <> " " <> j2, "N", n')
38
39 -- Groupe : Adjectif + Conjonction de coordination + Adjectif
40 -- groupNgrams ((j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",j2'):xs) = groupNgrams ((j1 <> " " <> j2, "ADJ", j2'):xs)
41
42 -- Groupe : Nom commun + préposition + Nom commun
43 groupNgrams ((n1,"NC",_):(p,"P",_):(n2,"NC",n2'):xs) = groupNgrams ((n1 <> " " <> p <> " " <> n2, "NC", n2'):xs)
44 groupNgrams ((n1,"NC",_):(p,"P",_):(n2,"NPP",n2'):xs) = groupNgrams ((n1 <> " " <> p <> " " <> n2, "NC", n2'):xs)
45 groupNgrams ((n1,"NC",_):(prep,"P",_):(det,"DET",_):(n2,"NPP",n2'):xs) = groupNgrams ((n1 <> " " <> prep <> " " <> det <> " " <> n2, "NC", n2'):xs)
46
47 -- Groupe : Plusieurs adjectifs successifs
48 groupNgrams ((x,"ADJ",_):(y,"ADJ",yy):xs) = groupNgrams ((x <> " " <> y, "ADJ", yy):xs)
49
50 -- Groupe : nom commun et adjectif
51 groupNgrams ((x,"NC",_):(y,"ADJ",yy):xs) = groupNgrams ((x <> " " <> y, "NC", yy):xs)
52 -- /!\ sometimes N instead of NC (why?)
53 groupNgrams ((x,"N",_):(y,"ADJ",yy):xs) = groupNgrams ((x <> " " <> y, "NC", yy):xs)
54
55 -- Groupe : adjectif et nom commun
56 groupNgrams ((x,"ADJ",_):(y,"NC",yy):xs) = groupNgrams ((x <> " " <> y, "NC", yy):xs)
57 -- /!\ sometimes N instead of NC (why?)
58 groupNgrams ((x,"ADJ",_):(y,"N",yy):xs) = groupNgrams ((x <> " " <> y, "NC", yy):xs)
59
60
61 groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
62 groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
63
64
65 -- Si aucune des règles précédentes n'est remplie
66 groupNgrams (x:xs) = (x:(groupNgrams xs))
67
68
69 textTest :: [String]
70 textTest = [ "L'heure d'arrivée des coureurs dépend de la météo du jour."]
71