]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Lang/En.hs
[Structure] Ngrams -> Text.
[gargantext.git] / src / Gargantext / Text / Lang / En.hs
1 {-|
2 Module : Gargantext.Text.Lang.En
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Text.Lang.En (selectNgrams, groupNgrams, textTest) where
18
19 import Gargantext.Prelude
20 import Data.Text (Text)
21 import Data.Monoid ((<>))
22
23 selectNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
24 selectNgrams xs = filter isNgrams xs
25 where
26 isNgrams (_,"NN" ,_ ) = True
27 isNgrams (_,"NNS" ,_ ) = True
28 isNgrams (_,"NNP" ,_ ) = True
29 isNgrams (_,"NN+CC",_ ) = True
30 isNgrams (_,_ ,"PERSON" ) = True
31 isNgrams (_,_ ,"ORGANIZATION") = True
32 isNgrams (_,_ ,"LOCATION" ) = True
33 isNgrams (_,_ ,_ ) = False
34
35
36 groupNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
37 groupNgrams [] = []
38
39 groupNgrams ((j1,"JJ",j1'):(c1,"CC",c1'):(j2,"JJ",j2'):(j3,"JJ",_):xs) = groupNgrams (jn1:cc:jn2:xs)
40 where
41 jn j' j'' jn' = (j' <> " " <> j'', "JJ", jn')
42 cc = (c1, "CC", c1')
43 jn1 = (j1, "JJ", j1')
44 jn2 = jn j2 j3 j2'
45
46 groupNgrams ((j1,"JJ",_):(_,"CC",_):(j2,"JJ",_):(n,"NN",nn):xs) = groupNgrams (jn1:jn2:xs)
47 where
48 jn j m mm p = (j <> " " <> m, p, mm)
49 jn1 = jn j1 n nn ("NN+CC" :: Text)
50 jn2 = jn j2 n nn ("NN+CC" :: Text)
51
52 groupNgrams ((j1,"JJ",_):(_,"CC",_):(j2,"JJ",_):(n,"NNS",nn):xs) = groupNgrams (jn1:jn2:xs)
53 where
54 jn j m mm p = (j <> " " <> m, p, mm)
55 jn1 = jn j1 n nn ("NN+CC" :: Text)
56 jn2 = jn j2 n nn ("NN+CC" :: Text)
57
58 groupNgrams ((x,"JJ",_):(y,"JJ",yy):xs) = groupNgrams ((x <> " " <> y, "JJ", yy):xs)
59 groupNgrams ((x,"JJ",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
60 groupNgrams ((x,"JJ",_):(y,"NNS",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
61
62 groupNgrams ((x,"NNP",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
63 groupNgrams ((x,"NN",_):(y,"NP",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
64
65 groupNgrams ((x,"NN",_):(y,"NNS",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
66 groupNgrams ((x,"NP",_):(y,"NP",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
67
68 groupNgrams ((x,"NN",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
69
70
71 -- extractNgrams "Test the antiinflammatory or analgesic activity?"
72 -- [[("``","``","O"),("Test","VB","O"),("the","DT","O"),("antiinflammatory activity analgesic activity","NN","O"),("?",".","O"),("''","''","O")]]
73 -- > should be (antiinflammatory activity) <> (analgesic activity)
74
75 groupNgrams ((x,"NN",_):(o,"IN",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> o <> " " <> y, "NN", yy):xs)
76 groupNgrams ((x,"NN",_):(o,"IN",_):(y,"NNP",yy):xs) = groupNgrams ((x <> " " <> o <> " " <> y, "NN", yy):xs)
77
78 groupNgrams ((x,"NN",_):(o,"IN",_):(det,"DT",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> o <> " " <> det <> " " <> y, "NN", yy):xs)
79 groupNgrams ((x,"NN",_):(o,"IN",_):(det,"DT",_):(y,"NNP",yy):xs) = groupNgrams ((x <> " " <> o <> " " <> det <> " " <> y, "NN", yy):xs)
80
81 groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
82 groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
83 groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs) = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
84
85 groupNgrams (x:xs) = (x:(groupNgrams xs))
86
87
88 textTest :: [Text]
89 textTest = [ "Alcoholic extract of Kaempferia galanga was tested for analgesic and antiinflammatory activities in animal models. "
90 , "Three doses, 300 mg/kg, 600 mg/kg and 1200 mg/kg of the plant extract prepared as a suspension in 2 ml of 2% gum acacia were used. "
91 , " Acute and sub acute inflammatory activities were studied in rats by carrageenan induced paw edema and cotton pellet induced granuloma models respectively. "
92 , "In both models, the standard drug used was aspirin 100 mg/kg. "
93 , "Two doses 600 mg/kg and 1200 mg/kg of plant extract exhibited significant (P<0.001) antiinflammatory activity in carrageenan model and cotton pellet granuloma model in comparison to control. "
94 , "Analgesic activity was studied in rats using hot plate and tail-flick models. "
95 , "Codeine 5 mg/kg and vehicle served as standard and control respectively. "
96 , "The two doses of plant extract exhibited significant analgesic activity in tail flick model (P<0.001) and hot plate model (P<0.001) in comparison to control. "
97 , "In conclusion K. galanga possesses antiinflammatory and analgesic activities. "]
98
99