2 Module : Gargantext.Core.Text.Corpus.API.Istex
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 module Gargantext.Core.Text.Corpus.API.Istex
16 import Data.List (concat)
18 import Data.Text (Text, pack)
20 import Gargantext.Core (Lang(..))
21 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
22 import Gargantext.Prelude
23 import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
24 import qualified ISTEX as ISTEX
25 import qualified ISTEX.Client as ISTEX
27 get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
29 docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
30 either (panic . pack . show) (toDoc' la) docs
32 toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
34 --printDebug "ISTEX" (ISTEX._documents_total docs')
35 mapM (toDoc la) (ISTEX._documents_hits docs')
37 -- | TODO remove dateSplit here
38 -- TODO current year as default
39 toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
40 toDoc la (ISTEX.Document i t a ab d s) = do
41 (utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") (Just . pack . show) d)
42 pure $ HyperdataDocument { _hd_bdd = Just "Istex"
45 , _hd_uniqId = Nothing
46 , _hd_uniqIdBdd = Nothing
49 , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)
50 , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
51 , _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)
53 , _hd_publication_date = fmap (pack . show) utctime
54 , _hd_publication_year = pub_year
55 , _hd_publication_month = pub_month
56 , _hd_publication_day = pub_day
57 , _hd_publication_hour = Nothing
58 , _hd_publication_minute = Nothing
59 , _hd_publication_second = Nothing
60 , _hd_language_iso2 = Just $ (pack . show) la }