]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Istex.hs
Merge branch 'dev' into 131-dev-ngrams-table-db-connection-2
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Istex.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Istex
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Istex
14 where
15
16 import Data.Either (Either(..))
17 import Data.List (concat)
18 import Data.Maybe
19 import Data.Text (Text, pack)
20
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
23 import qualified Gargantext.Defaults as Defaults
24 import Gargantext.Prelude
25 import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
26 import qualified ISTEX as ISTEX
27 import qualified ISTEX.Client as ISTEX
28
29
30 get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
31 get la q _ml = do
32 --docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
33 --printDebug "[Istex.get] calling getMetadataScrollProgress for la" la
34 --printDebug "[Istex.get] calling getMetadataScrollProgress for q" q
35 --printDebug "[Istex.get] calling getMetadataScrollProgress for ml" ml
36 -- The "scroll" expects "d/h/m/s/ms" time interval. Let's set it to "1 month"
37 --eDocs <- ISTEX.getMetadataScroll q ((\_n -> pack $ "1m") <$> ml) Nothing 0 --(fromIntegral <$> ml)
38 eDocs <- ISTEX.getMetadataScroll (q <> " abstract:*") "1m" Nothing 0 --(fromIntegral <$> ml)
39 printDebug "[Istex.get] will print length" (0 :: Int)
40 case eDocs of
41 Left _ -> pure ()
42 Right (ISTEX.Documents { _documents_hits }) -> printDebug "[Istex.get] length docs" $ length _documents_hits
43 --ISTEX.getMetadataScrollProgress q ((\_ -> pack $ "1m") <$> ml) Nothing progress errorHandler
44 case eDocs of
45 Left err -> panic . pack . show $ err
46 Right docs -> toDoc' la docs
47 --pure $ either (panic . pack . show) (toDoc' la) eDocs
48 -- where
49 -- progress (ISTEX.ScrollResponse { _scroll_documents = ISTEX.Documents { _documents_hits }}) =
50 -- printDebug "[Istex.get] got docs: " $ length _documents_hits
51 -- errorHandler err = printDebug "[Istex.get] error" $ show err
52
53 toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
54 toDoc' la docs' = mapM (toDoc la) (ISTEX._documents_hits docs')
55 --printDebug "ISTEX" (ISTEX._documents_total docs')
56
57 -- | TODO remove dateSplit here
58 -- TODO current year as default
59 toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
60 toDoc la (ISTEX.Document i t a ab d s) = do
61 --printDebug "ISTEX date" d
62 (utctime, (pub_year, pub_month, pub_day)) <-
63 Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) (Just . pack . show) d)
64 --printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day))
65 pure $ HyperdataDocument { _hd_bdd = Just "Istex"
66 , _hd_doi = Just i
67 , _hd_url = Nothing
68 , _hd_uniqId = Nothing
69 , _hd_uniqIdBdd = Nothing
70 , _hd_page = Nothing
71 , _hd_title = t
72 , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)
73 , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
74 , _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)
75 , _hd_abstract = ab
76 , _hd_publication_date = fmap (pack . show) utctime
77 , _hd_publication_year = pub_year
78 , _hd_publication_month = pub_month
79 , _hd_publication_day = pub_day
80 , _hd_publication_hour = Nothing
81 , _hd_publication_minute = Nothing
82 , _hd_publication_second = Nothing
83 , _hd_language_iso2 = Just $ (pack . show) la
84 }