1 {-# LANGUAGE DerivingStrategies #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE ScopedTypeVariables #-}
4 {-# LANGUAGE TypeApplications #-}
5 module Core.Text.Corpus.Query (tests) where
10 import Gargantext.Core (Lang(..))
11 import Gargantext.Core.Text.Corpus.Query
12 import Gargantext.Core.Types
13 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
15 import System.Environment
16 import qualified Data.Text as T
17 import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
18 import qualified Gargantext.Core.Text.Corpus.API.Pubmed as Pubmed
19 import qualified Network.Api.Arxiv as Arxiv
22 import Test.Tasty.HUnit
23 import Test.Tasty.QuickCheck hiding (Positive, Negative)
26 = PubmedApiKey { _PubmedApiKey :: T.Text }
27 deriving stock (Show, Eq)
28 deriving newtype IsString
30 pubmedSettings :: IO (Maybe PubmedApiKey)
31 pubmedSettings = fmap fromString <$> lookupEnv "PUBMED_API_KEY"
34 tests = withResource pubmedSettings (const (pure ())) $ \getPubmedKey ->
35 testGroup "Boolean Query Engine" [
36 testProperty "Parses 'A OR B'" testParse01
37 , testProperty "Parses 'A AND B'" testParse02
38 , testProperty "Parses '-A'" testParse03
39 , testProperty "Parses 'NOT A'" testParse03_01
40 , testProperty "Parses 'A -B'" testParse04
41 , testProperty "Parses 'A NOT -B'" testParse04_01
42 , testProperty "Parses 'A AND B -C' (left associative)" testParse05
43 , testProperty "Parses 'A AND (B -C)' (right associative)" testParse05_01
44 , testProperty "Parses (A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)" testParse06
45 , testCase "Parses words into a single constant" testWordsIntoConst
46 , testGroup "Arxiv expression converter" [
47 testCase "It supports 'A AND B'" testArxiv01_01
48 , testCase "It supports '\"Haskell\" AND \"Agda\"'" testArxiv01_02
49 , testCase "It supports 'A OR B'" testArxiv02
50 , testCase "It supports 'A AND NOT B'" testArxiv03_01
51 , testCase "It supports 'A AND -B'" testArxiv03_02
52 , testCase "It supports 'A AND -B'" testArxiv03_02
53 , testCase "It supports 'A AND NOT (NOT B)'" testArxiv04_01
54 , testCase "It supports 'A AND NOT (NOT (NOT B))'" testArxiv04_02
55 , testCase "It supports 'A OR NOT B'" testArxiv05
56 , testCase "It supports '-A'" testArxiv06
58 , testGroup "PUBMED expression converter" [
59 testCase "It supports 'A'" testPubMed01
60 , testCase "It supports '-A'" testPubMed02_01
61 , testCase "It supports 'NOT A'" testPubMed02_02
62 , testCase "It supports 'NOT (NOT A)'" testPubMed02_03
63 , testCase "It supports '\"Haskell\" AND \"Idris\"'" testPubMed03
64 , testCase "It supports 'A OR B'" testPubMed04
66 , testGroup "PUBMED real queries (skipped if PUBMED_API_KEY env var not set)" [
67 testCase "It searches for \"Covid\"" (testPubMedCovid_01 getPubmedKey)
68 , testCase "It searches for \"Covid\" AND \"Alzheimer\"" (testPubMedCovid_02 getPubmedKey)
70 -- We skip the Arxiv tests if the PUBMED_API_KEY is not set just for conveniency, to have
71 -- only a single flow-control mechanism.
72 , testGroup "ARXIV real queries (skipped if PUBMED_API_KEY env var not set)" [
73 testCase "It searches for \"Haskell\"" (testArxivRealWorld_01 getPubmedKey)
74 , testCase "It searches for \"Haskell\" AND \"Agda\"" (testArxivRealWorld_02 getPubmedKey)
78 -- | Checks that the 'RawQuery' can be translated into the expected 'BoolExpr' form,
79 -- by also checking that both renders back to the initial 'RawQuery'.
80 translatesInto :: RawQuery -> BoolExpr Term -> Property
81 (translatesInto) raw boolExpr =
82 let parsed = parseQuery raw
83 expected = Right (unsafeMkQuery boolExpr)
84 in counterexample (show parsed <> " != " <> show expected) $
85 (renderQuery <$> parsed) === (renderQuery <$> expected)
87 testParse01 :: Property
88 testParse01 = "A OR B" `translatesInto` (BConst (Positive "A") `BOr` BConst (Positive "B"))
90 testParse02 :: Property
91 testParse02 = "A AND B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Positive "B"))
93 testParse03 :: Property
94 testParse03 = "-A" `translatesInto` (BConst (Negative "A"))
96 testParse03_01 :: Property
97 testParse03_01 = "NOT A" `translatesInto` (BConst (Negative "A"))
99 testParse04 :: Property
100 testParse04 = "A -B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Negative "B"))
102 -- Both 'A -B' and 'A AND -B' desugars into the same form.
103 testParse04_01 :: Property
104 testParse04_01 = "A AND -B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Negative "B"))
106 testParse05 :: Property
107 testParse05 = "A AND B -C" `translatesInto` ((BConst (Positive "A") `BAnd` BConst (Positive "B")) `BAnd` BConst (Negative "C"))
109 testParse05_01 :: Property
111 "A AND (B -C)" `translatesInto` (BConst (Positive "A") `BAnd` (BConst (Positive "B") `BAnd` BConst (Negative "C")))
113 testParse06 :: Property
115 translatesInto "(A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)"
118 ((BConst (Positive "A") `BOr` (BConst (Positive "B"))) `BOr` (BConst (Negative "C")))
120 ((BConst (Positive "D") `BOr` (BConst (Positive "E"))) `BOr` (BConst (Positive "F")))
123 ((BConst (Positive "G") `BOr` (BConst (Positive "H"))) `BOr` (BConst (Positive "I")))
127 testWordsIntoConst :: Assertion
129 let (expected :: BoolExpr Term) =
130 fromCNF (boolTreeToCNF @Term $ (BConst (Positive "The Art of Computer Programming") `BAnd` (BConst (Positive "Conceptual Mathematics"))))
131 in case parseQuery "\"The Art of Computer Programming\" AND \"Conceptual Mathematics\"" of
133 -> assertBool err False
135 -> fromCNF (getQuery x) @?= expected
137 withValidQuery :: RawQuery -> (Query -> Assertion) -> Assertion
138 withValidQuery rawQuery onValidParse = do
139 case parseQuery rawQuery of
140 Left err -> assertBool err False
141 Right x -> onValidParse x
144 testArxiv01_01 :: Assertion
145 testArxiv01_01 = withValidQuery "A AND B" $ \q ->
146 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
147 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
149 testArxiv01_02 :: Assertion
150 testArxiv01_02 = withValidQuery "\"Haskell\" AND \"Agda\"" $ \q ->
151 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
152 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["Haskell"]) ((Arxiv.Exp $ Arxiv.Abs ["Agda"]))))
154 testArxiv02 :: Assertion
155 testArxiv02 = withValidQuery "A OR B" $ \q ->
156 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
157 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
159 testArxiv03_01 :: Assertion
160 testArxiv03_01 = withValidQuery "A AND NOT B" $ \q ->
161 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
162 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
164 testArxiv03_02 :: Assertion
165 testArxiv03_02 = withValidQuery "A AND -B" $ \q ->
166 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
167 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
169 -- Double negation get turned into positive.
170 testArxiv04_01 :: Assertion
171 testArxiv04_01 = withValidQuery "A AND NOT (NOT B)" $ \q ->
172 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
173 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
175 testArxiv04_02 :: Assertion
176 testArxiv04_02 = withValidQuery "A AND NOT (NOT (NOT B))" $ \q ->
177 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
178 (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
180 testArxiv05 :: Assertion
181 testArxiv05 = withValidQuery "A OR NOT B" $ \q ->
182 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
183 (Arxiv.qExp (Arxiv.convertQuery q) == Just (
184 Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"])
185 (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["B"]) (Arxiv.Exp $ Arxiv.Abs ["B"]))
189 testArxiv06 :: Assertion
190 testArxiv06 = withValidQuery "-A" $ \q ->
191 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
192 (Arxiv.qExp (Arxiv.convertQuery q) == Just (
193 Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) (Arxiv.Exp $ Arxiv.Abs ["A"])
201 testPubMed01 :: Assertion
202 testPubMed01 = withValidQuery "A" $ \q ->
203 assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
204 (Pubmed.getESearch (Pubmed.convertQuery q) == "A")
206 testPubMed02_01 :: Assertion
207 testPubMed02_01 = withValidQuery "-A" $ \q -> Pubmed.getESearch (Pubmed.convertQuery q) @?= "NOT+A"
209 testPubMed02_02 :: Assertion
210 testPubMed02_02 = withValidQuery "NOT A" $ \q -> Pubmed.getESearch (Pubmed.convertQuery q) @?= "NOT+A"
212 testPubMed02_03 :: Assertion
213 testPubMed02_03 = withValidQuery "NOT (NOT A)" $ \q -> Pubmed.getESearch (Pubmed.convertQuery q) @?= "A"
215 testPubMed03 :: Assertion
216 testPubMed03 = withValidQuery "\"Haskell\" AND \"Idris\"" $ \q ->
217 Pubmed.getESearch (Pubmed.convertQuery q) @?= "Haskell+AND+Idris"
219 testPubMed04 :: Assertion
220 testPubMed04 = withValidQuery "A OR B" $ \q ->
221 Pubmed.getESearch (Pubmed.convertQuery q) @?= "A+OR+B"
223 testPubMedCovid_01 :: IO (Maybe PubmedApiKey) -> Assertion
224 testPubMedCovid_01 getPubmedKey = do
225 mb_key <- getPubmedKey
228 Just k -> withValidQuery "\"Covid\"" $ \query -> do
229 res <- Pubmed.get (_PubmedApiKey k) query (Just 1)
231 Left err -> fail (show err)
233 hyperDocs <- sourceToList cnd
235 [] -> fail "No documents found."
236 (x:_) -> _hd_title x @?= Just "Being a Hospice Nurse in Times of the COVID-19 Pandemic: A Phenomenological Study of Providing End-of-Life Care."
238 testPubMedCovid_02 :: IO (Maybe PubmedApiKey) -> Assertion
239 testPubMedCovid_02 getPubmedKey = do
240 mb_key <- getPubmedKey
243 Just k -> withValidQuery "\"Covid\" AND \"Alzheimer\"" $ \query -> do
244 res <- Pubmed.get (_PubmedApiKey k) query (Just 1)
246 Left err -> fail (show err)
248 hyperDocs <- sourceToList cnd
250 [] -> fail "No documents found."
251 (x:_) -> _hd_title x @?= Just "Neurodegenerative and Neurodevelopmental Diseases and the Gut-Brain Axis: The Potential of Therapeutic Targeting of the Microbiome."
253 testArxivRealWorld_01 :: IO (Maybe PubmedApiKey) -> Assertion
254 testArxivRealWorld_01 getPubmedKey = do
255 mb_key <- getPubmedKey
258 Just _ -> withValidQuery "\"Haskell\"" $ \query -> do
259 (_, cnd) <- Arxiv.get EN query (Just 1)
260 hyperDocs <- sourceToList cnd
262 [] -> fail "No documents found."
263 (x:_) -> _hd_title x @?= Just "Haskell for OCaml programmers"
265 testArxivRealWorld_02 :: IO (Maybe PubmedApiKey) -> Assertion
266 testArxivRealWorld_02 getPubmedKey = do
267 mb_key <- getPubmedKey
270 Just _ -> withValidQuery "\"Haskell\" AND \"Agda\"" $ \query -> do
271 (_, cnd) <- Arxiv.get EN query (Just 1)
272 hyperDocs <- sourceToList cnd
274 [] -> fail "No documents found."
275 (x:_) -> _hd_title x @?= Just "Toward Hole-Driven Development with Liquid Haskell"