2 Module : Gargantext.Core.Text.Corpus.API.Hal
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Text.Corpus.API.Hal
18 import Data.Text (Text, pack, intercalate)
20 import Gargantext.Core (Lang(..))
21 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
22 import Gargantext.Prelude
23 import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
24 import qualified HAL as HAL
25 import qualified HAL.Client as HAL
26 import qualified HAL.Doc.Corpus as HAL
28 get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
30 eDocs <- HAL.getMetadataWith q (Just 0) ml
31 either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
33 getC :: Lang -> Text -> Maybe Integer -> IO (ConduitT () HyperdataDocument IO ())
35 eDocs <- HAL.getMetadataRecursively q (Just 0) ml
37 Left err -> panic $ pack $ show err
38 Right docsC -> pure $ docsC .| mapMC (toDoc' la)
40 toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
41 toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
42 (utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d)
43 pure $ HyperdataDocument { _hd_bdd = Just "Hal"
44 , _hd_doi = Just $ pack $ show i
46 , _hd_uniqId = Nothing
47 , _hd_uniqIdBdd = Nothing
49 , _hd_title = Just $ intercalate " " t
50 , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" aus
51 , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id
52 , _hd_source = Just $ maybe "Nothing" identity s
53 , _hd_abstract = Just $ intercalate " " ab
54 , _hd_publication_date = fmap (pack . show) utctime
55 , _hd_publication_year = pub_year
56 , _hd_publication_month = pub_month
57 , _hd_publication_day = pub_day
58 , _hd_publication_hour = Nothing
59 , _hd_publication_minute = Nothing
60 , _hd_publication_second = Nothing
61 , _hd_language_iso2 = Just $ (pack . show) la }