]> Git — Sourcephile - gargantext.git/blob - test/Ngrams.hs
[FEAT] Ngrams extractor in English with tests : ok. Need to factor pattern matching...
[gargantext.git] / test / Ngrams.hs
1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE ScopedTypeVariables #-}
3
4 import Test.Hspec
5 import Test.QuickCheck
6 import Control.Exception (evaluate)
7
8
9 import Data.Text (Text)
10
11 import Data.Gargantext.Prelude
12 import Data.Gargantext.Types.Main (Language(..))
13 import Data.Gargantext.Ngrams
14 import Data.Gargantext.Ngrams.Occurrences (parseOccurrences)
15 import Data.Gargantext.Ngrams.Parser (extractNgrams, selectNgrams)
16
17
18 ngramsExtractionTest EN = hspec $ do
19 describe "Ngrams extraction in English Language" $ do
20 let textTest = [ "Alcoholic extract of Kaempferia galanga was tested for analgesic and antiinflammatory activities in animal models. ", "Three doses, 300 mg/kg, 600 mg/kg and 1200 mg/kg of the plant extract prepared as a suspension in 2 ml of 2% gum acacia were used. ", " Acute and sub acute inflammatory activities were studied in rats by carrageenan induced paw edema and cotton pellet induced granuloma models respectively. ", "In both models, the standard drug used was aspirin 100 mg/kg. ", "Two doses 600 mg/kg and 1200 mg/kg of plant extract exhibited significant (P<0.001) antiinflammatory activity in carrageenan model and cotton pellet granuloma model in comparison to control. ", "Analgesic activity was studied in rats using hot plate and tail-flick models. ", "Codeine 5 mg/kg and vehicle served as standard and control respectively. ", "The two doses of plant extract exhibited significant analgesic activity in tail flick model (P<0.001) and hot plate model (P<0.001) in comparison to control. ", "In conclusion K. galanga possesses antiinflammatory and analgesic activities. "] :: [String]
21
22 it "\"Of\" seperates two ngrams" $ do
23 t1 <- pm (selectNgrams EN) <$> extractNgrams (textTest !! 0)
24 t1 `shouldBe` [[("Alcoholic extract","NN","O"),("Kaempferia galanga","NN","O"),("analgesic activities","NN+CC","O"),("antiinflammatory activities","NN+CC","O"),("animal models","NN","O")]]
25
26 it "Tests the conjunction of coordination in two ngrams with its adjectives" $ do
27 t2 <- pm (selectNgrams EN) <$> extractNgrams (textTest !! 2)
28 t2 `shouldBe` [[("Acute activities","NN+CC","O"),("sub acute inflammatory activities","NN+CC","O"),("rats","NNS","O"),("carrageenan","NN","O"),("paw edema","NN","O"),("cotton pellet","NN","O"),("granuloma models","NN","O")]]
29
30
31 ngramsExtractionTest FR = hspec $ do
32 describe "Ngrams extraction in English Language" $ do
33 let textTest = [ "Alcoholic extract of Kaempferia galanga was tested for analgesic and antiinflammatory activities in animal models. ", "Three doses, 300 mg/kg, 600 mg/kg and 1200 mg/kg of the plant extract prepared as a suspension in 2 ml of 2% gum acacia were used. ", " Acute and sub acute inflammatory activities were studied in rats by carrageenan induced paw edema and cotton pellet induced granuloma models respectively. ", "In both models, the standard drug used was aspirin 100 mg/kg. ", "Two doses 600 mg/kg and 1200 mg/kg of plant extract exhibited significant (P<0.001) antiinflammatory activity in carrageenan model and cotton pellet granuloma model in comparison to control. ", "Analgesic activity was studied in rats using hot plate and tail-flick models. ", "Codeine 5 mg/kg and vehicle served as standard and control respectively. ", "The two doses of plant extract exhibited significant analgesic activity in tail flick model (P<0.001) and hot plate model (P<0.001) in comparison to control. ", "In conclusion K. galanga possesses antiinflammatory and analgesic activities. "] :: [String]
34
35 it "\"Of\" seperates two ngrams" $ do
36 t1 <- pm (selectNgrams EN) <$> extractNgrams (textTest !! 0)
37 t1 `shouldBe` [[("Alcoholic extract","NN","O"),("Kaempferia galanga","NN","O"),("analgesic activities","NN+CC","O"),("antiinflammatory activities","NN+CC","O"),("animal models","NN","O")]]
38
39 it "Tests the conjunction of coordination in two ngrams with its adjectives" $ do
40 t2 <- pm (selectNgrams EN) <$> extractNgrams (textTest !! 2)
41 t2 `shouldBe` [[("Acute activities","NN+CC","O"),("sub acute inflammatory activities","NN+CC","O"),("rats","NNS","O"),("carrageenan","NN","O"),("paw edema","NN","O"),("cotton pellet","NN","O"),("granuloma models","NN","O")]]
42
43
44
45
46 parsersTest = hspec $ do
47 describe "Parser for occurrences" $ do
48
49 let txt = "internet"
50
51 it "returns the result of one parsing" $ do
52 parseOccurrences "internet" "internet" `shouldBe` Right 1
53
54 -- | Context of Text should be toLower
55 it "returns the result of one parsing not case sensitive" $ do
56 let txtCase = "Internet"
57 parseOccurrences txtCase "internet" `shouldBe` Right 1
58
59 it "returns the result of one parsing after space" $ do
60 parseOccurrences txt " internet"
61 `shouldBe` Right 1
62
63 it "returns the result of one parsing after chars" $ do
64 parseOccurrences txt "l'internet"
65 `shouldBe` Right 1
66
67 it "returns the result of multiple parsing" $ do
68 parseOccurrences txt "internet internet of things"
69 `shouldBe` Right 2
70
71 it "returns the result of multiple parsing separated by text" $ do
72 parseOccurrences txt "internet in the internet of things"
73 `shouldBe` Right 2
74
75 it "returns the result of multiple parsing separated by punctuation" $ do
76 parseOccurrences txt "internet. In the internet of things, internet like; internet?"
77 `shouldBe` Right 4
78
79 -- describe "Parser for nodes" $ do
80 -- it "returns the result of one parsing after space" $ do
81 -- occOfCorpus 249509 "sciences" `shouldReturn` 7
82
83 main :: IO ()
84 main = do
85 -- parsersTest
86 -- ngramsExtractionTest EN
87 ngramsExtractionTest FR
88