]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Hal.hs
[pubmed] use fixed pubmed repo, fix per page to be > 20
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Hal.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Hal
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.Core.Text.Corpus.API.Hal
13 where
14
15 import Conduit
16 import Data.Either
17 import Data.Maybe
18 import Data.Text (Text, pack, intercalate)
19 import Servant.Client (ClientError)
20
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
23 import qualified Gargantext.Defaults as Defaults
24 import Gargantext.Prelude
25 import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
26 import qualified HAL as HAL
27 import qualified HAL.Client as HAL
28 import qualified HAL.Doc.Corpus as HAL
29
30 get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
31 get la q ml = do
32 eDocs <- HAL.getMetadataWith q (Just 0) ml
33 either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
34
35 getC :: Lang -> Text -> Maybe Integer -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
36 getC la q ml = do
37 eRes <- HAL.getMetadataWithC q (Just 0) ml
38 pure $ (\(len, docsC) -> (len, docsC .| mapMC (toDoc' la))) <$> eRes
39 -- case eRes of
40 -- Left err -> panic $ pack $ show err
41 -- Right (len, docsC) -> pure (len, docsC .| mapMC (toDoc' la))
42
43 toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
44 toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
45 (utctime, (pub_year, pub_month, pub_day)) <-
46 Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) Just d)
47 pure HyperdataDocument { _hd_bdd = Just "Hal"
48 , _hd_doi = Just $ pack $ show i
49 , _hd_url = Nothing
50 , _hd_uniqId = Nothing
51 , _hd_uniqIdBdd = Nothing
52 , _hd_page = Nothing
53 , _hd_title = Just $ intercalate " " t
54 , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" aus
55 , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id
56 , _hd_source = Just $ maybe "Nothing" identity s
57 , _hd_abstract = Just $ intercalate " " ab
58 , _hd_publication_date = fmap (pack . show) utctime
59 , _hd_publication_year = pub_year
60 , _hd_publication_month = pub_month
61 , _hd_publication_day = pub_day
62 , _hd_publication_hour = Nothing
63 , _hd_publication_minute = Nothing
64 , _hd_publication_second = Nothing
65 , _hd_language_iso2 = Just $ (pack . show) la }