]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Istex.hs
[DOC] IsTex API TODO for alter
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Istex.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Istex
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Istex
14 where
15
16 import Data.Either (Either(..))
17 import Data.List (concat)
18 import Data.Maybe
19 import Data.Text (Text, pack)
20
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
23 import qualified Gargantext.Defaults as Defaults
24 import Gargantext.Prelude
25 import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
26 import qualified ISTEX as ISTEX
27 import qualified ISTEX.Client as ISTEX
28
29
30 get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
31 get la q _ml = do
32 --printDebug "[Istex.get] calling getMetadataScrollProgress for la" la
33 --printDebug "[Istex.get] calling getMetadataScrollProgress for q" q
34 --printDebug "[Istex.get] calling getMetadataScrollProgress for ml" ml
35 -- The "scroll" expects "d/h/m/s/ms" time interval. Let's set it to "1 month"
36 --eDocs <- ISTEX.getMetadataScroll q ((\_n -> pack $ "1m") <$> ml) Nothing 0 --(fromIntegral <$> ml)
37
38 -- TODO check if abstract is in query already if not add like below
39 -- eDocs <- ISTEX.getMetadataScroll (q <> " abstract:*") "1m" Nothing 0 --(fromIntegral <$> ml)
40 -- eDocs <- ISTEX.getMetadataScroll q "1m" Nothing 0 --(fromIntegral <$> ml)
41 eDocs <- ISTEX.getMetadataWith q (Just 5000)
42 printDebug "[Istex.get] will print length" (0 :: Int)
43 case eDocs of
44 Left _ -> pure ()
45 Right (ISTEX.Documents { _documents_hits }) -> printDebug "[Istex.get] length docs" $ length _documents_hits
46 --ISTEX.getMetadataScrollProgress q ((\_ -> pack $ "1m") <$> ml) Nothing progress errorHandler
47 case eDocs of
48 Left err -> panic . pack . show $ err
49 Right docs -> toDoc' la docs
50 --pure $ either (panic . pack . show) (toDoc' la) eDocs
51 -- where
52 -- progress (ISTEX.ScrollResponse { _scroll_documents = ISTEX.Documents { _documents_hits }}) =
53 -- printDebug "[Istex.get] got docs: " $ length _documents_hits
54 -- errorHandler err = printDebug "[Istex.get] error" $ show err
55
56 toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
57 toDoc' la docs' = mapM (toDoc la) (ISTEX._documents_hits docs')
58 --printDebug "ISTEX" (ISTEX._documents_total docs')
59
60 -- | TODO remove dateSplit here
61 -- TODO current year as default
62 toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
63 toDoc la (ISTEX.Document i t a ab d s) = do
64 --printDebug "ISTEX date" d
65 (utctime, (pub_year, pub_month, pub_day)) <-
66 Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) (Just . pack . show) d)
67 --printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day))
68 pure $ HyperdataDocument { _hd_bdd = Just "Istex"
69 , _hd_doi = Just i
70 , _hd_url = Nothing
71 , _hd_uniqId = Nothing
72 , _hd_uniqIdBdd = Nothing
73 , _hd_page = Nothing
74 , _hd_title = t
75 , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)
76 , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
77 , _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)
78 , _hd_abstract = ab
79 , _hd_publication_date = fmap (pack . show) utctime
80 , _hd_publication_year = pub_year
81 , _hd_publication_month = pub_month
82 , _hd_publication_day = pub_day
83 , _hd_publication_hour = Nothing
84 , _hd_publication_minute = Nothing
85 , _hd_publication_second = Nothing
86 , _hd_language_iso2 = Just $ (pack . show) la
87 }