]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Hal.hs
Merge branch 'dev' into 111-dev-refactor-text-corpus-api-with-conduit
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Hal.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Hal
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.Core.Text.Corpus.API.Hal
13 where
14
15 import Conduit
16 import Data.Either
17 import Data.Maybe
18 import Data.Text (Text, pack, intercalate)
19
20 import Gargantext.Core (Lang(..))
21 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
22 import Gargantext.Prelude
23 import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
24 import qualified HAL as HAL
25 import qualified HAL.Client as HAL
26 import qualified HAL.Doc.Corpus as HAL
27
28 get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
29 get la q ml = do
30 eDocs <- HAL.getMetadataWith q (Just 0) ml
31 either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
32
33 getC :: Lang -> Text -> Maybe Integer -> IO (ConduitT () HyperdataDocument IO ())
34 getC la q ml = do
35 eDocs <- HAL.getMetadataRecursively q (Just 0) ml
36 case eDocs of
37 Left err -> panic $ pack $ show err
38 Right docsC -> pure $ docsC .| mapMC (toDoc' la)
39
40 toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
41 toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
42 (utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d)
43 pure $ HyperdataDocument { _hd_bdd = Just "Hal"
44 , _hd_doi = Just $ pack $ show i
45 , _hd_url = Nothing
46 , _hd_uniqId = Nothing
47 , _hd_uniqIdBdd = Nothing
48 , _hd_page = Nothing
49 , _hd_title = Just $ intercalate " " t
50 , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" aus
51 , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id
52 , _hd_source = Just $ maybe "Nothing" identity s
53 , _hd_abstract = Just $ intercalate " " ab
54 , _hd_publication_date = fmap (pack . show) utctime
55 , _hd_publication_year = pub_year
56 , _hd_publication_month = pub_month
57 , _hd_publication_day = pub_day
58 , _hd_publication_hour = Nothing
59 , _hd_publication_minute = Nothing
60 , _hd_publication_second = Nothing
61 , _hd_language_iso2 = Just $ (pack . show) la }
62