2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 module Gargantext.Core.Text.Corpus.API.Pubmed
17 import Control.Monad.Reader (runReaderT)
18 import Data.Either (Either)
20 import Data.Text (Text)
21 import qualified Data.Text as Text
22 import Servant.Client (ClientError)
24 import Gargantext.Prelude
25 import Gargantext.Core (Lang(..))
26 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
28 import qualified PUBMED as PubMed
29 import qualified PUBMED.Parser as PubMedDoc
30 import PUBMED.Types (Config(..))
37 -- | TODO put default pubmed query in gargantext.ini
38 -- by default: 10K docs
42 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
44 eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = mAPIKey
46 , perPage = Nothing })
47 let takeLimit = case l of
48 Nothing -> mapC identity
49 Just l' -> takeC $ fromIntegral l'
50 pure $ (\(len, docsC) -> (len, docsC .| takeLimit .| mapC (toDoc EN))) <$> eRes
51 --either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
52 -- <$> PubMed.getMetadataWithC q l
54 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
55 toDoc l (PubMedDoc.PubMed { pubmed_id
56 , pubmed_article = PubMedDoc.PubMedArticle t j as aus
57 , pubmed_date = PubMedDoc.PubMedDate a y m d }
58 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
61 , _hd_uniqId = Just $ Text.pack $ show pubmed_id
62 , _hd_uniqIdBdd = Nothing
65 , _hd_authors = authors aus
66 , _hd_institutes = institutes aus
68 , _hd_abstract = abstract as
69 , _hd_publication_date = Just $ Text.pack $ show a
70 , _hd_publication_year = Just $ fromIntegral y
71 , _hd_publication_month = Just m
72 , _hd_publication_day = Just d
73 , _hd_publication_hour = Nothing
74 , _hd_publication_minute = Nothing
75 , _hd_publication_second = Nothing
76 , _hd_language_iso2 = Just $ (Text.pack . show) l }
78 authors :: [PubMedDoc.Author] -> Maybe Text
80 authors au = Just $ (Text.intercalate ", ")
82 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
84 institutes :: [PubMedDoc.Author] -> Maybe Text
85 institutes [] = Nothing
86 institutes au = Just $ (Text.intercalate ", ")
87 $ (map (Text.replace ", " " - "))
89 $ map PubMedDoc.affiliation au
92 abstract :: [Text] -> Maybe Text
94 abstract as' = Just $ Text.intercalate ", " as'