2 Module : Gargantext.Core.Text.Corpus.API.Istex
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 module Gargantext.Core.Text.Corpus.API.Istex
16 import Data.Either (Either(..))
17 import Data.List (concat)
19 import Data.Text (Text, pack)
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
23 import qualified Gargantext.Defaults as Defaults
24 import Gargantext.Prelude
25 import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
26 import qualified ISTEX as ISTEX
27 import qualified ISTEX.Client as ISTEX
30 get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
32 --docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
33 --printDebug "[Istex.get] calling getMetadataScrollProgress for la" la
34 --printDebug "[Istex.get] calling getMetadataScrollProgress for q" q
35 --printDebug "[Istex.get] calling getMetadataScrollProgress for ml" ml
36 -- The "scroll" expects "d/h/m/s/ms" time interval. Let's set it to "1 month"
37 --eDocs <- ISTEX.getMetadataScroll q ((\_n -> pack $ "1m") <$> ml) Nothing 0 --(fromIntegral <$> ml)
38 --eDocs <- ISTEX.getMetadataScroll (q <> " abstract:*") "1m" Nothing 0 --(fromIntegral <$> ml)
39 eDocs <- ISTEX.getMetadataWith q (Just 5000)
40 printDebug "[Istex.get] will print length" (0 :: Int)
43 Right (ISTEX.Documents { _documents_hits }) -> printDebug "[Istex.get] length docs" $ length _documents_hits
44 --ISTEX.getMetadataScrollProgress q ((\_ -> pack $ "1m") <$> ml) Nothing progress errorHandler
46 Left err -> panic . pack . show $ err
47 Right docs -> toDoc' la docs
48 --pure $ either (panic . pack . show) (toDoc' la) eDocs
50 -- progress (ISTEX.ScrollResponse { _scroll_documents = ISTEX.Documents { _documents_hits }}) =
51 -- printDebug "[Istex.get] got docs: " $ length _documents_hits
52 -- errorHandler err = printDebug "[Istex.get] error" $ show err
54 toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
55 toDoc' la docs' = mapM (toDoc la) (ISTEX._documents_hits docs')
56 --printDebug "ISTEX" (ISTEX._documents_total docs')
58 -- | TODO remove dateSplit here
59 -- TODO current year as default
60 toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
61 toDoc la (ISTEX.Document i t a ab d s) = do
62 --printDebug "ISTEX date" d
63 (utctime, (pub_year, pub_month, pub_day)) <-
64 Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) (Just . pack . show) d)
65 --printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day))
66 pure $ HyperdataDocument { _hd_bdd = Just "Istex"
69 , _hd_uniqId = Nothing
70 , _hd_uniqIdBdd = Nothing
73 , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)
74 , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
75 , _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)
77 , _hd_publication_date = fmap (pack . show) utctime
78 , _hd_publication_year = pub_year
79 , _hd_publication_month = pub_month
80 , _hd_publication_day = pub_day
81 , _hd_publication_hour = Nothing
82 , _hd_publication_minute = Nothing
83 , _hd_publication_second = Nothing
84 , _hd_language_iso2 = Just $ (pack . show) la