2 Module : Gargantext.Core.Text.Corpus.API.Arxiv
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
14 module Gargantext.Core.Text.Corpus.API.Arxiv
18 import Data.Either (Either(..))
20 import Data.Text (Text)
21 import qualified Data.Text as Text
22 import Servant.Client (ClientError)
24 import Gargantext.Prelude
25 import Gargantext.Core (Lang(..))
26 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
28 import qualified Arxiv as Arxiv
29 import qualified Network.Api.Arxiv as Ax
33 type Limit = Arxiv.Limit
35 -- | TODO put default pubmed query in gargantext.ini
36 -- by default: 10K docs
37 get :: Lang -> Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
39 (cnt, resC) <- Arxiv.apiSimpleC l [Text.unpack q]
40 pure $ Right (Just $ fromIntegral cnt, resC .| mapC (toDoc la))
42 toDoc :: Lang -> Arxiv.Result -> HyperdataDocument
43 toDoc l (Arxiv.Result { abstract
55 ) = HyperdataDocument { _hd_bdd = Just "Arxiv"
56 , _hd_doi = Just $ Text.pack doi
57 , _hd_url = Just $ Text.pack url
58 , _hd_uniqId = Just $ Text.pack id
59 , _hd_uniqIdBdd = Nothing
61 , _hd_title = Just $ Text.pack title
62 , _hd_authors = authors aus
63 , _hd_institutes = institutes aus
64 , _hd_source = Just $ Text.pack journal
65 , _hd_abstract = Just $ Text.pack abstract
66 , _hd_publication_date = Just $ Text.pack publication_date
67 , _hd_publication_year = fromIntegral <$> year
68 , _hd_publication_month = Nothing -- TODO parse publication_date
69 , _hd_publication_day = Nothing
70 , _hd_publication_hour = Nothing
71 , _hd_publication_minute = Nothing
72 , _hd_publication_second = Nothing
73 , _hd_language_iso2 = Just $ (Text.pack . show) l }
75 authors :: [Ax.Author] -> Maybe Text
77 authors aus' = Just $ (Text.intercalate ", ")
81 institutes :: [Ax.Author] -> Maybe Text
82 institutes [] = Nothing
83 institutes aus' = Just $ (Text.intercalate ", ")
84 $ (map (Text.replace ", " " - "))