]> Git — Sourcephile - gargantext.git/blob - src/Data/Gargantext/Ngrams/Lang/Fr.hs
[FEAT] Ngrams extractor in English with tests : ok. Need to factor pattern matching...
[gargantext.git] / src / Data / Gargantext / Ngrams / Lang / Fr.hs
1 {-# LANGUAGE OverloadedStrings #-}
2
3 module Data.Gargantext.Ngrams.Lang.Fr (selectNgrams, groupNgrams)
4 where
5
6 import Data.Gargantext.Prelude
7 import Data.Text (Text)
8 import Data.Monoid ((<>))
9
10 selectNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
11 selectNgrams xs = pf selectNgrams' xs
12 where
13 selectNgrams' (_,"NN",_) = True
14 selectNgrams' (_,"NNS",_) = True
15 selectNgrams' (_,"NNP",_) = True
16 selectNgrams' (_,"NN+CC",_) = True
17 selectNgrams' (_,_,"PERSON") = True
18 selectNgrams' (_,_,"ORGANIZATION") = True
19 selectNgrams' (_,_,"LOCATION") = True
20 selectNgrams' (_,_,_) = False
21
22
23 groupNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
24 groupNgrams [] = []
25
26 groupNgrams ((j1,"JJ",j1'):(c1,"CC",c1'):(j2,"JJ",j2'):(j3,"JJ",_):xs) = groupNgrams (jn1:cc:jn2:xs)
27 where
28 jn j' j'' jn' = (j' <> " " <> j'', "JJ", jn')
29 cc = (c1,"CC",c1')
30 jn1 = (j1, "JJ", j1')
31 jn2 = jn j2 j3 j2'
32
33 groupNgrams ((j1,"JJ",_):(_,"CC",_):(j2,"JJ",_):(n,"NN",nn):xs) = groupNgrams (jn1:jn2:xs)
34 where
35 jn j m mm p = (j <> " " <> m, p, mm)
36 jn1 = jn j1 n nn ("NN+CC" :: Text)
37 jn2 = jn j2 n nn ("NN+CC" :: Text)
38
39 groupNgrams ((j1,"JJ",_):(_,"CC",_):(j2,"JJ",_):(n,"NNS",nn):xs) = groupNgrams (jn1:jn2:xs)
40 where
41 jn j m mm p = (j <> " " <> m, p, mm)
42 jn1 = jn j1 n nn ("NN+CC" :: Text)
43 jn2 = jn j2 n nn ("NN+CC" :: Text)
44
45 groupNgrams ((x,"JJ",_):(y,"JJ",yy):xs) = groupNgrams ((x <> " " <> y, "JJ", yy):xs)
46 groupNgrams ((x,"JJ",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
47 groupNgrams ((x,"JJ",_):(y,"NNS",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
48
49 groupNgrams ((x,"NNP",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
50 groupNgrams ((x,"NN",_):(y,"NP",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
51 groupNgrams ((x,"NN",_):(y,"NNS",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
52 groupNgrams ((x,"NP",_):(y,"NP",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
53
54 groupNgrams ((x,"NN",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
55
56
57 -- extractNgrams "Test the antiinflammatory or analgesic activity?"
58 -- [[("``","``","O"),("Test","VB","O"),("the","DT","O"),("antiinflammatory activity analgesic activity","NN","O"),("?",".","O"),("''","''","O")]]
59 -- > should be (antiinflammatory activity) <> (analgesic activity)
60
61 groupNgrams ((x,"NN",_):("of","IN",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> "of" <> " " <> y, "NN", yy):xs)
62
63 groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
64 groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
65
66 groupNgrams (x:xs) = (x:(groupNgrams xs))
67
68
69 --textTest :: [String]
70 --textTest = [ "Alcoholic extract of Kaempferia galanga was tested for analgesic and antiinflammatory activities in animal models. "
71 -- , "Three doses, 300 mg/kg, 600 mg/kg and 1200 mg/kg of the plant extract prepared as a suspension in 2 ml of 2% gum acacia were used. "
72 -- , " Acute and sub acute inflammatory activities were studied in rats by carrageenan induced paw edema and cotton pellet induced granuloma models respectively. "
73 -- , "In both models, the standard drug used was aspirin 100 mg/kg. "
74 -- , "Two doses 600 mg/kg and 1200 mg/kg of plant extract exhibited significant (P<0.001) antiinflammatory activity in carrageenan model and cotton pellet granuloma model in comparison to control. "
75 -- , "Analgesic activity was studied in rats using hot plate and tail-flick models. "
76 -- , "Codeine 5 mg/kg and vehicle served as standard and control respectively. "
77 -- , "The two doses of plant extract exhibited significant analgesic activity in tail flick model (P<0.001) and hot plate model (P<0.001) in comparison to control. "
78 -- , "In conclusion K. galanga possesses antiinflammatory and analgesic activities. "]
79 --
80 --
81