2 Module : Gargantext.Core.Text.Corpus.API.Istex
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 module Gargantext.Core.Text.Corpus.API.Istex
16 import Data.Either (Either(..))
17 import Data.List (concat)
19 import Data.Text (Text, pack)
21 import qualified Data.Text as Text
22 import qualified Data.List as List
23 import Gargantext.Core (Lang(..))
24 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
25 import qualified Gargantext.Defaults as Defaults
26 import Gargantext.Prelude
27 import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
28 import qualified ISTEX as ISTEX
29 import qualified ISTEX.Client as ISTEX
32 type MaxResults = Maybe Int
34 get :: Lang -> Query -> MaxResults -> IO [HyperdataDocument]
35 get la query' maxResults = do
36 --printDebug "[Istex.get] calling getMetadataScrollProgress for la" la
37 --printDebug "[Istex.get] calling getMetadataScrollProgress for q" q
38 --printDebug "[Istex.get] calling getMetadataScrollProgress for ml" ml
39 -- The "scroll" expects "d/h/m/s/ms" time interval. Let's set it to "1 month"
40 --eDocs <- ISTEX.getMetadataScroll q ((\_n -> pack $ "1m") <$> ml) Nothing 0 --(fromIntegral <$> ml)
42 -- TODO check if abstract is in query already if not add like below
43 -- eDocs <- ISTEX.getMetadataScroll (q <> " abstract:*") "1m" Nothing 0 --(fromIntegral <$> ml)
44 -- eDocs <- ISTEX.getMetadataScroll q "1m" Nothing 0 --(fromIntegral <$> ml)
46 let query = case (List.length $ Text.splitOn ":" query') == 1 of
47 -- True case means users is entering default search of IsTex
48 -- In that case we need to enrich his query with 2 parameters
49 -- First expected language: user has to define it in GTXT
50 -- Second : query in abstract
51 True -> ("language:"<> lang la) <> " AND abstract:"<>query'
57 -- Complex queries of IsTex needs parameters using ":" so we leave the query as it is
58 -- in that case we suppose user is knowing what s.he is doing
60 eDocs <- ISTEX.getMetadataWith query maxResults
61 -- printDebug "[Istex.get] will print length" (0 :: Int)
64 Right (ISTEX.Documents { _documents_hits }) -> printDebug "[Istex.get] length docs" $ length _documents_hits
65 --ISTEX.getMetadataScrollProgress q ((\_ -> pack $ "1m") <$> ml) Nothing progress errorHandler
67 Left err -> panic . Text.pack . show $ err
68 Right docs -> toDoc' la docs
69 --pure $ either (panic . pack . show) (toDoc' la) eDocs
71 -- progress (ISTEX.ScrollResponse { _scroll_documents = ISTEX.Documents { _documents_hits }}) =
72 -- printDebug "[Istex.get] got docs: " $ length _documents_hits
73 -- errorHandler err = printDebug "[Istex.get] error" $ show err
75 toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
76 toDoc' la docs' = mapM (toDoc la) (ISTEX._documents_hits docs')
77 --printDebug "ISTEX" (ISTEX._documents_total docs')
79 -- | TODO remove dateSplit here
80 -- TODO current year as default
81 toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
82 toDoc la (ISTEX.Document i t a ab d s) = do
83 --printDebug "ISTEX date" d
84 (utctime, (pub_year, pub_month, pub_day)) <-
85 Date.dateSplit la (maybe (Just $ pack $ show Defaults.year) (Just . pack . show) d)
86 --printDebug "toDoc Istex" (utctime, (pub_year, pub_month, pub_day))
87 pure $ HyperdataDocument { _hd_bdd = Just "Istex"
90 , _hd_uniqId = Nothing
91 , _hd_uniqIdBdd = Nothing
94 , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)
95 , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
96 , _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)
98 , _hd_publication_date = fmap (pack . show) utctime
99 , _hd_publication_year = pub_year
100 , _hd_publication_month = pub_month
101 , _hd_publication_day = pub_day
102 , _hd_publication_hour = Nothing
103 , _hd_publication_minute = Nothing
104 , _hd_publication_second = Nothing
105 , _hd_language_iso2 = Just $ (pack . show) la