]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Istex.hs
Merge branch 'ngrams-replace' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell...
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Istex.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Istex
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Istex
14 where
15
16 import Data.Either (either)
17 import Data.List (concat)
18 import Data.Maybe
19 import Data.Maybe (catMaybes)
20 import Data.Text (Text, pack)
21
22 import Gargantext.Core (Lang(..))
23 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
24 import Gargantext.Prelude
25 import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
26 import qualified ISTEX as ISTEX
27 import qualified ISTEX.Client as ISTEX
28
29 get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
30 get la q ml = do
31 docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
32 either (panic . pack . show) (toDoc' la) docs
33
34 toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
35 toDoc' la docs' = do
36 --printDebug "ISTEX" (ISTEX._documents_total docs')
37 mapM (toDoc la) (ISTEX._documents_hits docs')
38
39 -- | TODO remove dateSplit here
40 -- TODO current year as default
41 toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
42 toDoc la (ISTEX.Document i t a ab d s) = do
43 (utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") (Just . pack . show) d)
44 pure $ HyperdataDocument (Just "Istex")
45 (Just i)
46 Nothing
47 Nothing
48 Nothing
49 Nothing
50 t
51 (Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a))
52 (Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a))
53 (Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s))
54 ab
55 (fmap (pack . show) utctime)
56 pub_year
57 pub_month
58 pub_day
59 Nothing
60 Nothing
61 Nothing
62 (Just $ (pack . show) la)