]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
[ngrams] some more fixes and refactorings
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Arxiv.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Arxiv
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
13
14 module Gargantext.Core.Text.Corpus.API.Arxiv
15 where
16
17 import Conduit
18 import Data.Either (Either(..))
19 import Data.Maybe
20 import Data.Text (Text)
21 import qualified Data.Text as Text
22 import Servant.Client (ClientError)
23
24 import Gargantext.Prelude
25 import Gargantext.Core (Lang(..))
26 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
27
28 import qualified Arxiv as Arxiv
29 import qualified Network.Api.Arxiv as Ax
30
31
32 type Query = Text
33 type Limit = Arxiv.Limit
34
35 -- | TODO put default pubmed query in gargantext.ini
36 -- by default: 10K docs
37 get :: Lang -> Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
38 get la q _l = do
39 (cnt, resC) <- Arxiv.apiSimpleC Nothing [Text.unpack q]
40 pure $ Right (Just $ fromIntegral cnt, resC .| mapC (toDoc la))
41
42 toDoc :: Lang -> Arxiv.Result -> HyperdataDocument
43 toDoc l (Arxiv.Result { abstract
44 , authors = aus
45 --, categories
46 , doi
47 , id
48 , journal
49 --, primaryCategory
50 , publication_date
51 , title
52 --, total
53 , url
54 , year }
55 ) = HyperdataDocument { _hd_bdd = Just "Arxiv"
56 , _hd_doi = Just $ Text.pack doi
57 , _hd_url = Just $ Text.pack url
58 , _hd_uniqId = Just $ Text.pack id
59 , _hd_uniqIdBdd = Nothing
60 , _hd_page = Nothing
61 , _hd_title = Just $ Text.pack title
62 , _hd_authors = authors aus
63 , _hd_institutes = institutes aus
64 , _hd_source = Just $ Text.pack journal
65 , _hd_abstract = Just $ Text.pack abstract
66 , _hd_publication_date = Just $ Text.pack publication_date
67 , _hd_publication_year = fromIntegral <$> year
68 , _hd_publication_month = Nothing -- TODO parse publication_date
69 , _hd_publication_day = Nothing
70 , _hd_publication_hour = Nothing
71 , _hd_publication_minute = Nothing
72 , _hd_publication_second = Nothing
73 , _hd_language_iso2 = Just $ (Text.pack . show) l }
74 where
75 authors :: [Ax.Author] -> Maybe Text
76 authors [] = Nothing
77 authors aus' = Just $ (Text.intercalate ", ")
78 $ map Text.pack
79 $ map Ax.auName aus'
80
81 institutes :: [Ax.Author] -> Maybe Text
82 institutes [] = Nothing
83 institutes aus' = Just $ (Text.intercalate ", ")
84 $ (map (Text.replace ", " " - "))
85 $ map Text.pack
86 $ map Ax.auFil aus'