]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Istex.hs
Merge remote-tracking branch 'origin/dev-forgot-password' into dev
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Istex.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Istex
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Istex
14 where
15
16 import Data.Either (Either(..))
17 import Data.List (concat)
18 import Data.Maybe
19 import Data.Text (Text, pack)
20
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
23 import qualified Gargantext.Defaults as Defaults
24 import Gargantext.Prelude
25 import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
26 import qualified ISTEX as ISTEX
27 import qualified ISTEX.Client as ISTEX
28
29 get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
30 get la q ml = do
31 --docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
32 printDebug "[Istex.get] calling getMetadataScrollProgress for la" la
33 printDebug "[Istex.get] calling getMetadataScrollProgress for q" q
34 printDebug "[Istex.get] calling getMetadataScrollProgress for ml" ml
35 -- The "scroll" expects "d/h/m/s/ms" time interval. Let's set it to "1 month"
36 --eDocs <- ISTEX.getMetadataScroll q ((\_n -> pack $ "1m") <$> ml) Nothing 0 --(fromIntegral <$> ml)
37 eDocs <- ISTEX.getMetadataScroll q "1m" Nothing 0 --(fromIntegral <$> ml)
38 printDebug "[Istex.get] will print length" (0 :: Int)
39 case eDocs of
40 Left _ -> pure ()
41 Right (ISTEX.Documents { _documents_hits }) -> printDebug "[Istex.get] length docs" $ length _documents_hits
42 --ISTEX.getMetadataScrollProgress q ((\_ -> pack $ "1m") <$> ml) Nothing progress errorHandler
43 case eDocs of
44 Left err -> panic . pack . show $ err
45 Right docs -> toDoc' la docs
46 --pure $ either (panic . pack . show) (toDoc' la) eDocs
47 -- where
48 -- progress (ISTEX.ScrollResponse { _scroll_documents = ISTEX.Documents { _documents_hits }}) =
49 -- printDebug "[Istex.get] got docs: " $ length _documents_hits
50 -- errorHandler err = printDebug "[Istex.get] error" $ show err
51
52 toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
53 toDoc' la docs' = mapM (toDoc la) (ISTEX._documents_hits docs')
54 --printDebug "ISTEX" (ISTEX._documents_total docs')
55
56 -- | TODO remove dateSplit here
57 -- TODO current year as default
58 toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
59 toDoc la (ISTEX.Document i t a ab d s) = do
60 (utctime, (pub_year, pub_month, pub_day)) <-
61 Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) (Just . pack . show) d)
62 pure $ HyperdataDocument { _hd_bdd = Just "Istex"
63 , _hd_doi = Just i
64 , _hd_url = Nothing
65 , _hd_uniqId = Nothing
66 , _hd_uniqIdBdd = Nothing
67 , _hd_page = Nothing
68 , _hd_title = t
69 , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)
70 , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
71 , _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)
72 , _hd_abstract = ab
73 , _hd_publication_date = fmap (pack . show) utctime
74 , _hd_publication_year = pub_year
75 , _hd_publication_month = pub_month
76 , _hd_publication_day = pub_day
77 , _hd_publication_hour = Nothing
78 , _hd_publication_minute = Nothing
79 , _hd_publication_second = Nothing
80 , _hd_language_iso2 = Just $ (pack . show) la }
81