]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Istex.hs
Merge branch 'dev' into 111-dev-refactor-text-corpus-api-with-conduit
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Istex.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Istex
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Istex
14 where
15
16 import Data.List (concat)
17 import Data.Maybe
18 import Data.Text (Text, pack)
19
20 import Gargantext.Core (Lang(..))
21 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
22 import Gargantext.Prelude
23 import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
24 import qualified ISTEX as ISTEX
25 import qualified ISTEX.Client as ISTEX
26
27 get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
28 get la q ml = do
29 docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
30 either (panic . pack . show) (toDoc' la) docs
31
32 toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
33 toDoc' la docs' = do
34 --printDebug "ISTEX" (ISTEX._documents_total docs')
35 mapM (toDoc la) (ISTEX._documents_hits docs')
36
37 -- | TODO remove dateSplit here
38 -- TODO current year as default
39 toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
40 toDoc la (ISTEX.Document i t a ab d s) = do
41 (utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") (Just . pack . show) d)
42 pure $ HyperdataDocument { _hd_bdd = Just "Istex"
43 , _hd_doi = Just i
44 , _hd_url = Nothing
45 , _hd_uniqId = Nothing
46 , _hd_uniqIdBdd = Nothing
47 , _hd_page = Nothing
48 , _hd_title = t
49 , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)
50 , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
51 , _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)
52 , _hd_abstract = ab
53 , _hd_publication_date = fmap (pack . show) utctime
54 , _hd_publication_year = pub_year
55 , _hd_publication_month = pub_month
56 , _hd_publication_day = pub_day
57 , _hd_publication_hour = Nothing
58 , _hd_publication_minute = Nothing
59 , _hd_publication_second = Nothing
60 , _hd_language_iso2 = Just $ (pack . show) la }
61