2 Module : Gargantext.Core.Text.Corpus.API.Arxiv
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
14 module Gargantext.Core.Text.Corpus.API.Arxiv
18 import Data.Either (Either(..))
20 import Data.Text (Text)
21 import qualified Data.Text as Text
22 import Servant.Client (ClientError)
24 import Gargantext.Prelude
25 import Gargantext.Core (Lang(..))
26 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
28 import qualified Arxiv as Arxiv
29 import qualified Network.Api.Arxiv as Ax
33 type Limit = Arxiv.Limit
35 -- | TODO put default pubmed query in gargantext.ini
36 -- by default: 10K docs
37 get :: Lang -> Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
39 (cnt, resC) <- Arxiv.apiSimpleC l [Text.unpack q]
40 pure $ Right (Just $ fromIntegral cnt, resC .| mapC (toDoc la))
42 toDoc :: Lang -> Arxiv.Result -> HyperdataDocument
43 toDoc l (Arxiv.Result { abstract
54 ) = HyperdataDocument { _hd_bdd = Just "Arxiv"
57 , _hd_uniqId = Just $ Text.pack doi
58 , _hd_uniqIdBdd = Nothing
60 , _hd_title = Just $ Text.pack title
61 , _hd_authors = authors aus
62 , _hd_institutes = institutes aus
63 , _hd_source = Just $ Text.pack journal
64 , _hd_abstract = Just $ Text.pack abstract
65 , _hd_publication_date = Just $ Text.pack publication_date
66 , _hd_publication_year = fromIntegral <$> year
67 , _hd_publication_month = Nothing -- TODO parse publication_date
68 , _hd_publication_day = Nothing
69 , _hd_publication_hour = Nothing
70 , _hd_publication_minute = Nothing
71 , _hd_publication_second = Nothing
72 , _hd_language_iso2 = Just $ (Text.pack . show) l }
74 authors :: [Ax.Author] -> Maybe Text
76 authors aus = Just $ (Text.intercalate ", ")
80 institutes :: [Ax.Author] -> Maybe Text
81 institutes [] = Nothing
82 institutes aus = Just $ (Text.intercalate ", ")
83 $ (map (Text.replace ", " " - "))