]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Hal.hs
[scrapers] fix limit with MAX_DOCS_SCRAPERS
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Hal.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Hal
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.Core.Text.Corpus.API.Hal
13 where
14
15 import Conduit
16 import Data.Either
17 import Data.Maybe
18 import Data.Text (Text, pack, intercalate)
19 import Servant.Client (ClientError)
20
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
23 import Gargantext.Prelude
24 import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
25 import qualified HAL as HAL
26 import qualified HAL.Client as HAL
27 import qualified HAL.Doc.Corpus as HAL
28
29 get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
30 get la q ml = do
31 eDocs <- HAL.getMetadataWith q (Just 0) ml
32 either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
33
34 getC :: Lang -> Text -> Maybe Integer -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
35 getC la q ml = do
36 eRes <- HAL.getMetadataWithC q (Just 0) ml
37 pure $ (\(len, docsC) -> (len, docsC .| mapMC (toDoc' la))) <$> eRes
38 -- case eRes of
39 -- Left err -> panic $ pack $ show err
40 -- Right (len, docsC) -> pure (len, docsC .| mapMC (toDoc' la))
41
42 toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
43 toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
44 (utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d)
45 pure $ HyperdataDocument { _hd_bdd = Just "Hal"
46 , _hd_doi = Just $ pack $ show i
47 , _hd_url = Nothing
48 , _hd_uniqId = Nothing
49 , _hd_uniqIdBdd = Nothing
50 , _hd_page = Nothing
51 , _hd_title = Just $ intercalate " " t
52 , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" aus
53 , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id
54 , _hd_source = Just $ maybe "Nothing" identity s
55 , _hd_abstract = Just $ intercalate " " ab
56 , _hd_publication_date = fmap (pack . show) utctime
57 , _hd_publication_year = pub_year
58 , _hd_publication_month = pub_month
59 , _hd_publication_day = pub_day
60 , _hd_publication_hour = Nothing
61 , _hd_publication_minute = Nothing
62 , _hd_publication_second = Nothing
63 , _hd_language_iso2 = Just $ (pack . show) la }
64