]> Git — Sourcephile - gargantext.git/blob - src-test/Core/Text/Corpus/Query.hs
Add opt-in integration tests for Arxiv and PubMed
[gargantext.git] / src-test / Core / Text / Corpus / Query.hs
1 {-# LANGUAGE DerivingStrategies #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE ScopedTypeVariables #-}
4 {-# LANGUAGE TypeApplications #-}
5 module Core.Text.Corpus.Query (tests) where
6
7 import Data.BoolExpr
8 import Data.Conduit
9 import Data.String
10 import Gargantext.Core (Lang(..))
11 import Gargantext.Core.Text.Corpus.Query
12 import Gargantext.Core.Types
13 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
14 import Prelude
15 import System.Environment
16 import qualified Data.Text as T
17 import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
18 import qualified Gargantext.Core.Text.Corpus.API.Pubmed as Pubmed
19 import qualified Network.Api.Arxiv as Arxiv
20
21 import Test.Tasty
22 import Test.Tasty.HUnit
23 import Test.Tasty.QuickCheck hiding (Positive, Negative)
24
25 newtype PubmedApiKey
26 = PubmedApiKey { _PubmedApiKey :: T.Text }
27 deriving stock (Show, Eq)
28 deriving newtype IsString
29
30 pubmedSettings :: IO (Maybe PubmedApiKey)
31 pubmedSettings = fmap fromString <$> lookupEnv "PUBMED_API_KEY"
32
33 tests :: TestTree
34 tests = withResource pubmedSettings (const (pure ())) $ \getPubmedKey ->
35 testGroup "Boolean Query Engine" [
36 testProperty "Parses 'A OR B'" testParse01
37 , testProperty "Parses 'A AND B'" testParse02
38 , testProperty "Parses '-A'" testParse03
39 , testProperty "Parses 'NOT A'" testParse03_01
40 , testProperty "Parses 'A -B'" testParse04
41 , testProperty "Parses 'A NOT -B'" testParse04_01
42 , testProperty "Parses 'A AND B -C' (left associative)" testParse05
43 , testProperty "Parses 'A AND (B -C)' (right associative)" testParse05_01
44 , testProperty "Parses (A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)" testParse06
45 , testCase "Parses words into a single constant" testWordsIntoConst
46 , testGroup "Arxiv expression converter" [
47 testCase "It supports 'A AND B'" testArxiv01_01
48 , testCase "It supports '\"Haskell\" AND \"Agda\"'" testArxiv01_02
49 , testCase "It supports 'A OR B'" testArxiv02
50 , testCase "It supports 'A AND NOT B'" testArxiv03_01
51 , testCase "It supports 'A AND -B'" testArxiv03_02
52 , testCase "It supports 'A AND -B'" testArxiv03_02
53 , testCase "It supports 'A AND NOT (NOT B)'" testArxiv04_01
54 , testCase "It supports 'A AND NOT (NOT (NOT B))'" testArxiv04_02
55 , testCase "It supports 'A OR NOT B'" testArxiv05
56 , testCase "It supports '-A'" testArxiv06
57 ]
58 , testGroup "PUBMED expression converter" [
59 testCase "It supports 'A'" testPubMed01
60 , testCase "It supports '-A'" testPubMed02_01
61 , testCase "It supports 'NOT A'" testPubMed02_02
62 , testCase "It supports 'NOT (NOT A)'" testPubMed02_03
63 , testCase "It supports '\"Haskell\" AND \"Idris\"'" testPubMed03
64 , testCase "It supports 'A OR B'" testPubMed04
65 ]
66 , testGroup "PUBMED real queries (skipped if PUBMED_API_KEY env var not set)" [
67 testCase "It searches for \"Covid\"" (testPubMedCovid_01 getPubmedKey)
68 , testCase "It searches for \"Covid\" AND \"Alzheimer\"" (testPubMedCovid_02 getPubmedKey)
69 ]
70 -- We skip the Arxiv tests if the PUBMED_API_KEY is not set just for conveniency, to have
71 -- only a single flow-control mechanism.
72 , testGroup "ARXIV real queries (skipped if PUBMED_API_KEY env var not set)" [
73 testCase "It searches for \"Haskell\"" (testArxivRealWorld_01 getPubmedKey)
74 , testCase "It searches for \"Haskell\" AND \"Agda\"" (testArxivRealWorld_02 getPubmedKey)
75 ]
76 ]
77
78 -- | Checks that the 'RawQuery' can be translated into the expected 'BoolExpr' form,
79 -- by also checking that both renders back to the initial 'RawQuery'.
80 translatesInto :: RawQuery -> BoolExpr Term -> Property
81 (translatesInto) raw boolExpr =
82 let parsed = parseQuery raw
83 expected = Right (unsafeMkQuery boolExpr)
84 in counterexample (show parsed <> " != " <> show expected) $
85 (renderQuery <$> parsed) === (renderQuery <$> expected)
86
87 testParse01 :: Property
88 testParse01 = "A OR B" `translatesInto` (BConst (Positive "A") `BOr` BConst (Positive "B"))
89
90 testParse02 :: Property
91 testParse02 = "A AND B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Positive "B"))
92
93 testParse03 :: Property
94 testParse03 = "-A" `translatesInto` (BConst (Negative "A"))
95
96 testParse03_01 :: Property
97 testParse03_01 = "NOT A" `translatesInto` (BConst (Negative "A"))
98
99 testParse04 :: Property
100 testParse04 = "A -B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Negative "B"))
101
102 -- Both 'A -B' and 'A AND -B' desugars into the same form.
103 testParse04_01 :: Property
104 testParse04_01 = "A AND -B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Negative "B"))
105
106 testParse05 :: Property
107 testParse05 = "A AND B -C" `translatesInto` ((BConst (Positive "A") `BAnd` BConst (Positive "B")) `BAnd` BConst (Negative "C"))
108
109 testParse05_01 :: Property
110 testParse05_01 =
111 "A AND (B -C)" `translatesInto` (BConst (Positive "A") `BAnd` (BConst (Positive "B") `BAnd` BConst (Negative "C")))
112
113 testParse06 :: Property
114 testParse06 =
115 translatesInto "(A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)"
116 (
117 (
118 ((BConst (Positive "A") `BOr` (BConst (Positive "B"))) `BOr` (BConst (Negative "C")))
119 `BAnd`
120 ((BConst (Positive "D") `BOr` (BConst (Positive "E"))) `BOr` (BConst (Positive "F")))
121 )
122 `BAnd` BNot (
123 ((BConst (Positive "G") `BOr` (BConst (Positive "H"))) `BOr` (BConst (Positive "I")))
124 )
125 )
126
127 testWordsIntoConst :: Assertion
128 testWordsIntoConst =
129 let (expected :: BoolExpr Term) =
130 fromCNF (boolTreeToCNF @Term $ (BConst (Positive "The Art of Computer Programming") `BAnd` (BConst (Positive "Conceptual Mathematics"))))
131 in case parseQuery "\"The Art of Computer Programming\" AND \"Conceptual Mathematics\"" of
132 Left err
133 -> assertBool err False
134 Right x
135 -> fromCNF (getQuery x) @?= expected
136
137 withValidQuery :: RawQuery -> (Query -> Assertion) -> Assertion
138 withValidQuery rawQuery onValidParse = do
139 case parseQuery rawQuery of
140 Left err -> assertBool err False
141 Right x -> onValidParse x
142
143
144 testArxiv01_01 :: Assertion
145 testArxiv01_01 = withValidQuery "A AND B" $ \q ->
146 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
147 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
148
149 testArxiv01_02 :: Assertion
150 testArxiv01_02 = withValidQuery "\"Haskell\" AND \"Agda\"" $ \q ->
151 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
152 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["Haskell"]) ((Arxiv.Exp $ Arxiv.Abs ["Agda"]))))
153
154 testArxiv02 :: Assertion
155 testArxiv02 = withValidQuery "A OR B" $ \q ->
156 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
157 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
158
159 testArxiv03_01 :: Assertion
160 testArxiv03_01 = withValidQuery "A AND NOT B" $ \q ->
161 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
162 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
163
164 testArxiv03_02 :: Assertion
165 testArxiv03_02 = withValidQuery "A AND -B" $ \q ->
166 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
167 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
168
169 -- Double negation get turned into positive.
170 testArxiv04_01 :: Assertion
171 testArxiv04_01 = withValidQuery "A AND NOT (NOT B)" $ \q ->
172 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
173 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
174
175 testArxiv04_02 :: Assertion
176 testArxiv04_02 = withValidQuery "A AND NOT (NOT (NOT B))" $ \q ->
177 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
178 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
179
180 testArxiv05 :: Assertion
181 testArxiv05 = withValidQuery "A OR NOT B" $ \q ->
182 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
183 (Arxiv.qExp (Arxiv.convertQuery q) == Just (
184 Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"])
185 (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["B"]) (Arxiv.Exp $ Arxiv.Abs ["B"]))
186 )
187 )
188
189 testArxiv06 :: Assertion
190 testArxiv06 = withValidQuery "-A" $ \q ->
191 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
192 (Arxiv.qExp (Arxiv.convertQuery q) == Just (
193 Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) (Arxiv.Exp $ Arxiv.Abs ["A"])
194 )
195 )
196
197 --
198 -- PUBMED tests
199 --
200
201 testPubMed01 :: Assertion
202 testPubMed01 = withValidQuery "A" $ \q ->
203 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
204 (Pubmed.getESearch (Pubmed.convertQuery q) == "A")
205
206 testPubMed02_01 :: Assertion
207 testPubMed02_01 = withValidQuery "-A" $ \q -> Pubmed.getESearch (Pubmed.convertQuery q) @?= "NOT+A"
208
209 testPubMed02_02 :: Assertion
210 testPubMed02_02 = withValidQuery "NOT A" $ \q -> Pubmed.getESearch (Pubmed.convertQuery q) @?= "NOT+A"
211
212 testPubMed02_03 :: Assertion
213 testPubMed02_03 = withValidQuery "NOT (NOT A)" $ \q -> Pubmed.getESearch (Pubmed.convertQuery q) @?= "A"
214
215 testPubMed03 :: Assertion
216 testPubMed03 = withValidQuery "\"Haskell\" AND \"Idris\"" $ \q ->
217 Pubmed.getESearch (Pubmed.convertQuery q) @?= "Haskell+AND+Idris"
218
219 testPubMed04 :: Assertion
220 testPubMed04 = withValidQuery "A OR B" $ \q ->
221 Pubmed.getESearch (Pubmed.convertQuery q) @?= "A+OR+B"
222
223 testPubMedCovid_01 :: IO (Maybe PubmedApiKey) -> Assertion
224 testPubMedCovid_01 getPubmedKey = do
225 mb_key <- getPubmedKey
226 case mb_key of
227 Nothing -> pure ()
228 Just k -> withValidQuery "\"Covid\"" $ \query -> do
229 res <- Pubmed.get (_PubmedApiKey k) query (Just 1)
230 case res of
231 Left err -> fail (show err)
232 Right (_, cnd) -> do
233 hyperDocs <- sourceToList cnd
234 case hyperDocs of
235 [] -> fail "No documents found."
236 (x:_) -> _hd_title x @?= Just "Being a Hospice Nurse in Times of the COVID-19 Pandemic: A Phenomenological Study of Providing End-of-Life Care."
237
238 testPubMedCovid_02 :: IO (Maybe PubmedApiKey) -> Assertion
239 testPubMedCovid_02 getPubmedKey = do
240 mb_key <- getPubmedKey
241 case mb_key of
242 Nothing -> pure ()
243 Just k -> withValidQuery "\"Covid\" AND \"Alzheimer\"" $ \query -> do
244 res <- Pubmed.get (_PubmedApiKey k) query (Just 1)
245 case res of
246 Left err -> fail (show err)
247 Right (_, cnd) -> do
248 hyperDocs <- sourceToList cnd
249 case hyperDocs of
250 [] -> fail "No documents found."
251 (x:_) -> _hd_title x @?= Just "Neurodegenerative and Neurodevelopmental Diseases and the Gut-Brain Axis: The Potential of Therapeutic Targeting of the Microbiome."
252
253 testArxivRealWorld_01 :: IO (Maybe PubmedApiKey) -> Assertion
254 testArxivRealWorld_01 getPubmedKey = do
255 mb_key <- getPubmedKey
256 case mb_key of
257 Nothing -> pure ()
258 Just _ -> withValidQuery "\"Haskell\"" $ \query -> do
259 (_, cnd) <- Arxiv.get EN query (Just 1)
260 hyperDocs <- sourceToList cnd
261 case hyperDocs of
262 [] -> fail "No documents found."
263 (x:_) -> _hd_title x @?= Just "Haskell for OCaml programmers"
264
265 testArxivRealWorld_02 :: IO (Maybe PubmedApiKey) -> Assertion
266 testArxivRealWorld_02 getPubmedKey = do
267 mb_key <- getPubmedKey
268 case mb_key of
269 Nothing -> pure ()
270 Just _ -> withValidQuery "\"Haskell\" AND \"Agda\"" $ \query -> do
271 (_, cnd) <- Arxiv.get EN query (Just 1)
272 hyperDocs <- sourceToList cnd
273 case hyperDocs of
274 [] -> fail "No documents found."
275 (x:_) -> _hd_title x @?= Just "Toward Hole-Driven Development with Liquid Haskell"