2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 module Gargantext.Core.Text.Corpus.API.Pubmed
18 import Control.Monad.Reader (runReaderT)
19 import Data.Either (Either)
21 import Data.Text (Text)
22 import qualified Data.Text as Text
23 import Servant.Client (ClientError)
25 import Gargantext.Prelude
26 import Gargantext.Core (Lang(..))
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
29 import qualified PUBMED as PubMed
30 import qualified PUBMED.Parser as PubMedDoc
31 import PUBMED.Types (Config(..))
38 -- | TODO put default pubmed query in gargantext.ini
39 -- by default: 10K docs
43 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
45 eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = Just apiKey
48 , mWebEnv = Nothing })
49 let takeLimit = case l of
50 Nothing -> mapC identity
51 Just l' -> takeC $ fromIntegral l'
52 pure $ (\(len, docsC) -> (len, docsC .| takeLimit .| mapC (toDoc EN))) <$> eRes
53 --either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
54 -- <$> PubMed.getMetadataWithC q l
56 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
57 toDoc l (PubMedDoc.PubMed { pubmed_id
58 , pubmed_article = PubMedDoc.PubMedArticle t j as aus
59 , pubmed_date = PubMedDoc.PubMedDate a y m d }
60 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
63 , _hd_uniqId = Just $ Text.pack $ show pubmed_id
64 , _hd_uniqIdBdd = Nothing
67 , _hd_authors = authors aus
68 , _hd_institutes = institutes aus
70 , _hd_abstract = abstract as
71 , _hd_publication_date = Just $ Text.pack $ show a
72 , _hd_publication_year = Just $ fromIntegral y
73 , _hd_publication_month = Just m
74 , _hd_publication_day = Just d
75 , _hd_publication_hour = Nothing
76 , _hd_publication_minute = Nothing
77 , _hd_publication_second = Nothing
78 , _hd_language_iso2 = Just $ (Text.pack . show) l }
80 authors :: [PubMedDoc.Author] -> Maybe Text
82 authors au = Just $ (Text.intercalate ", ")
84 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
86 institutes :: [PubMedDoc.Author] -> Maybe Text
87 institutes [] = Nothing
88 institutes au = Just $ (Text.intercalate ", ")
89 $ (map (Text.replace ", " " - "))
91 $ map PubMedDoc.affiliation au
94 abstract :: [Text] -> Maybe Text
96 abstract as' = Just $ Text.intercalate ", " as'