]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
Merge branch 'dev' into dev-openalex
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Arxiv.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Arxiv
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
13 {-# LANGUAGE ViewPatterns #-}
14
15 module Gargantext.Core.Text.Corpus.API.Arxiv
16 ( get
17 -- * Internals for testing
18 , convertQuery
19 ) where
20
21 import Conduit
22 import Data.Maybe
23 import Data.Text (Text, unpack)
24 import qualified Data.Text as Text
25
26 import Gargantext.Prelude
27 import Gargantext.Core (Lang(..))
28 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
29 import Gargantext.Core.Text.Corpus.Query as Corpus
30 import Gargantext.Core.Types (Term(..))
31
32 import qualified Arxiv as Arxiv
33 import qualified Network.Api.Arxiv as Ax
34
35
36 -- | Converts a Gargantext's generic boolean query into an Arxiv Query.
37 convertQuery :: Corpus.Query -> Ax.Query
38 convertQuery q = mkQuery (interpretQuery q transformAST)
39 where
40 mkQuery :: Maybe Ax.Expression -> Ax.Query
41 mkQuery mb_exp = Ax.Query { Ax.qExp = mb_exp
42 , Ax.qIds = []
43 , Ax.qStart = 0
44 , Ax.qItems = Arxiv.batchSize }
45
46 -- Converts a 'BoolExpr' with 'Term's on the leaves into an Arxiv's expression.
47 -- It yields 'Nothing' if the AST cannot be converted into a meaningful expression.
48 transformAST :: BoolExpr Term -> Maybe Ax.Expression
49 transformAST ast = case ast of
50 BAnd sub (BConst (Negative term))
51 -- The second term become positive, so that it can be translated.
52 -> Ax.AndNot <$> (transformAST sub) <*> transformAST (BConst (Positive term))
53 BAnd term1 (BNot term2)
54 -> Ax.AndNot <$> transformAST term1 <*> transformAST term2
55 BAnd sub1 sub2
56 -> Ax.And <$> transformAST sub1 <*> transformAST sub2
57 BOr sub1 sub2
58 -> Ax.Or <$> transformAST sub1 <*> transformAST sub2
59 BNot (BConst (Negative term))
60 -> transformAST (BConst (Positive term)) -- double negation
61 -- We can handle negatives via `ANDNOT` with itself.
62 BNot sub
63 -> Ax.AndNot <$> transformAST sub <*> transformAST sub
64 -- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
65 BTrue
66 -> Nothing
67 -- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
68 BFalse
69 -> Nothing
70 BConst (Positive (Term term))
71 -> Just $ Ax.Exp $ Ax.Abs [unpack term]
72 -- We can handle negatives via `ANDNOT` with itself.
73 BConst (Negative (Term term))
74 -> Just $ Ax.AndNot (Ax.Exp $ Ax.Abs [unpack term]) (Ax.Exp $ Ax.Abs [unpack term])
75
76 -- | TODO put default pubmed query in gargantext.ini
77 -- by default: 10K docs
78 get :: Lang
79 -> Corpus.Query
80 -> Maybe Corpus.Limit
81 -> IO (Maybe Integer, ConduitT () HyperdataDocument IO ())
82 get la (convertQuery -> query) (fmap getLimit -> limit) = do
83 (cnt, resC) <- case limit of
84 Nothing -> Arxiv.searchAxv' query
85 (Just l) -> do (cnt, res) <- Arxiv.searchAxv' query
86 pure (cnt, res .| takeC l)
87 pure $ (Just $ fromIntegral cnt, resC .| mapC (toDoc la))
88
89 toDoc :: Lang -> Arxiv.Result -> HyperdataDocument
90 toDoc l (Arxiv.Result { abstract
91 , authors = aus
92 --, categories
93 , doi
94 , id
95 , journal
96 --, primaryCategory
97 , publication_date
98 , title
99 --, total
100 , url
101 , year }
102 ) = HyperdataDocument { _hd_bdd = Just "Arxiv"
103 , _hd_doi = Just $ Text.pack doi
104 , _hd_url = Just $ Text.pack url
105 , _hd_uniqId = Just $ Text.pack id
106 , _hd_uniqIdBdd = Nothing
107 , _hd_page = Nothing
108 , _hd_title = Just $ Text.pack title
109 , _hd_authors = authors aus
110 , _hd_institutes = institutes aus
111 , _hd_source = Just $ Text.pack journal
112 , _hd_abstract = Just $ Text.pack abstract
113 , _hd_publication_date = Just $ Text.pack publication_date
114 , _hd_publication_year = fromIntegral <$> year
115 , _hd_publication_month = Nothing -- TODO parse publication_date
116 , _hd_publication_day = Nothing
117 , _hd_publication_hour = Nothing
118 , _hd_publication_minute = Nothing
119 , _hd_publication_second = Nothing
120 , _hd_language_iso2 = Just $ (Text.pack . show) l }
121 where
122 authors :: [Ax.Author] -> Maybe Text
123 authors [] = Nothing
124 authors aus' = Just $ (Text.intercalate ", ")
125 $ map Text.pack
126 $ map Ax.auName aus'
127
128 institutes :: [Ax.Author] -> Maybe Text
129 institutes [] = Nothing
130 institutes aus' = Just $ (Text.intercalate ", ")
131 $ (map (Text.replace ", " " - "))
132 $ map Text.pack
133 $ map Ax.auFil aus'