]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
[arxiv] more work on arxiv endpoint for corpus
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Arxiv.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Arxiv
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
13
14 module Gargantext.Core.Text.Corpus.API.Arxiv
15 where
16
17 import Conduit
18 import Data.Either (Either(..))
19 import Data.Maybe
20 import Data.Text (Text)
21 import qualified Data.Text as Text
22 import Servant.Client (ClientError)
23
24 import Gargantext.Prelude
25 import Gargantext.Core (Lang(..))
26 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
27
28 import qualified Arxiv as Arxiv
29 import qualified Network.Api.Arxiv as Ax
30
31
32 type Query = Text
33 type Limit = Arxiv.Limit
34
35 -- | TODO put default pubmed query in gargantext.ini
36 -- by default: 10K docs
37 get :: Lang -> Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
38 get la q l = do
39 (cnt, resC) <- Arxiv.apiSimpleC l [Text.unpack q]
40 pure $ Right (Just $ fromIntegral cnt, resC .| mapC (toDoc la))
41
42 toDoc :: Lang -> Arxiv.Result -> HyperdataDocument
43 toDoc l (Arxiv.Result { abstract
44 , authors = aus
45 , categories
46 , doi
47 , journal
48 , primaryCategory
49 , publication_date
50 , title
51 , total
52 , url
53 , year }
54 ) = HyperdataDocument { _hd_bdd = Just "Arxiv"
55 , _hd_doi = Nothing
56 , _hd_url = Nothing
57 , _hd_uniqId = Just $ Text.pack doi
58 , _hd_uniqIdBdd = Nothing
59 , _hd_page = Nothing
60 , _hd_title = Just $ Text.pack title
61 , _hd_authors = authors aus
62 , _hd_institutes = institutes aus
63 , _hd_source = Just $ Text.pack journal
64 , _hd_abstract = Just $ Text.pack abstract
65 , _hd_publication_date = Just $ Text.pack publication_date
66 , _hd_publication_year = fromIntegral <$> year
67 , _hd_publication_month = Nothing -- TODO parse publication_date
68 , _hd_publication_day = Nothing
69 , _hd_publication_hour = Nothing
70 , _hd_publication_minute = Nothing
71 , _hd_publication_second = Nothing
72 , _hd_language_iso2 = Just $ (Text.pack . show) l }
73 where
74 authors :: [Ax.Author] -> Maybe Text
75 authors [] = Nothing
76 authors aus = Just $ (Text.intercalate ", ")
77 $ map Text.pack
78 $ map Ax.auName aus
79
80 institutes :: [Ax.Author] -> Maybe Text
81 institutes [] = Nothing
82 institutes aus = Just $ (Text.intercalate ", ")
83 $ (map (Text.replace ", " " - "))
84 $ map Text.pack
85 $ map Ax.auFil aus