2 Module : Gargantext.Core.Text.Corpus.API.Arxiv
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
13 {-# LANGUAGE ViewPatterns #-}
15 module Gargantext.Core.Text.Corpus.API.Arxiv
17 -- * Internals for testing
23 import Data.Text (Text, unpack)
24 import qualified Data.Text as Text
26 import Gargantext.Prelude
27 import Gargantext.Core (Lang(..))
28 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
29 import Gargantext.Core.Text.Corpus.Query as Corpus
30 import Gargantext.Core.Types (Term(..))
32 import qualified Arxiv as Arxiv
33 import qualified Network.Api.Arxiv as Ax
36 -- | Converts a Gargantext's generic boolean query into an Arxiv Query.
37 convertQuery :: Corpus.Query -> Ax.Query
38 convertQuery q = mkQuery (interpretQuery q transformAST)
40 mkQuery :: Maybe Ax.Expression -> Ax.Query
41 mkQuery mb_exp = Ax.Query { Ax.qExp = mb_exp
44 , Ax.qItems = Arxiv.batchSize }
46 -- Converts a 'BoolExpr' with 'Term's on the leaves into an Arxiv's expression.
47 -- It yields 'Nothing' if the AST cannot be converted into a meaningful expression.
48 transformAST :: BoolExpr Term -> Maybe Ax.Expression
49 transformAST ast = case ast of
50 BAnd sub (BConst (Negative term))
51 -- The second term become positive, so that it can be translated.
52 -> Ax.AndNot <$> (transformAST sub) <*> transformAST (BConst (Positive term))
53 BAnd term1 (BNot term2)
54 -> Ax.AndNot <$> transformAST term1 <*> transformAST term2
56 -> Ax.And <$> transformAST sub1 <*> transformAST sub2
58 -> Ax.Or <$> transformAST sub1 <*> transformAST sub2
59 BNot (BConst (Negative term))
60 -> transformAST (BConst (Positive term)) -- double negation
61 -- We can handle negatives via `ANDNOT` with itself.
63 -> Ax.AndNot <$> transformAST sub <*> transformAST sub
64 -- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
67 -- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
70 BConst (Positive (Term term))
71 -> Just $ Ax.Exp $ Ax.Abs [unpack term]
72 -- We can handle negatives via `ANDNOT` with itself.
73 BConst (Negative (Term term))
74 -> Just $ Ax.AndNot (Ax.Exp $ Ax.Abs [unpack term]) (Ax.Exp $ Ax.Abs [unpack term])
76 -- | TODO put default pubmed query in gargantext.ini
77 -- by default: 10K docs
81 -> IO (Maybe Integer, ConduitT () HyperdataDocument IO ())
82 get la (convertQuery -> query) (fmap getLimit -> limit) = do
83 (cnt, resC) <- case limit of
84 Nothing -> Arxiv.searchAxv' query
85 (Just l) -> do (cnt, res) <- Arxiv.searchAxv' query
86 pure (cnt, res .| takeC l)
87 pure $ (Just $ fromIntegral cnt, resC .| mapC (toDoc la))
89 toDoc :: Lang -> Arxiv.Result -> HyperdataDocument
90 toDoc l (Arxiv.Result { abstract
102 ) = HyperdataDocument { _hd_bdd = Just "Arxiv"
103 , _hd_doi = Just $ Text.pack doi
104 , _hd_url = Just $ Text.pack url
105 , _hd_uniqId = Just $ Text.pack id
106 , _hd_uniqIdBdd = Nothing
108 , _hd_title = Just $ Text.pack title
109 , _hd_authors = authors aus
110 , _hd_institutes = institutes aus
111 , _hd_source = Just $ Text.pack journal
112 , _hd_abstract = Just $ Text.pack abstract
113 , _hd_publication_date = Just $ Text.pack publication_date
114 , _hd_publication_year = fromIntegral <$> year
115 , _hd_publication_month = Nothing -- TODO parse publication_date
116 , _hd_publication_day = Nothing
117 , _hd_publication_hour = Nothing
118 , _hd_publication_minute = Nothing
119 , _hd_publication_second = Nothing
120 , _hd_language_iso2 = Just $ (Text.pack . show) l }
122 authors :: [Ax.Author] -> Maybe Text
124 authors aus' = Just $ (Text.intercalate ", ")
128 institutes :: [Ax.Author] -> Maybe Text
129 institutes [] = Nothing
130 institutes aus' = Just $ (Text.intercalate ", ")
131 $ (map (Text.replace ", " " - "))