2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 module Gargantext.Core.Text.Corpus.API.Pubmed
17 import Data.Either (Either)
19 import Data.Text (Text)
20 import qualified Data.Text as Text
21 import Servant.Client (ClientError)
23 import Gargantext.Prelude
24 import Gargantext.Core (Lang(..))
25 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
27 import qualified PUBMED as PubMed
28 import qualified PUBMED.Parser as PubMedDoc
29 import PUBMED.Types (Config(..))
33 type Limit = PubMed.Limit
36 -- | TODO put default pubmed query in gargantext.ini
37 -- by default: 10K docs
41 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
43 eRes <- PubMed.getMetadataWithC (Config { mAPIKey = mAPIKey }) q l
44 pure $ (\(len, docsC) -> (len, docsC .| mapC (toDoc EN))) <$> eRes
45 --either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
46 -- <$> PubMed.getMetadataWithC q l
48 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
49 toDoc l (PubMedDoc.PubMed { pubmed_id
50 , pubmed_article = PubMedDoc.PubMedArticle t j as aus
51 , pubmed_date = PubMedDoc.PubMedDate a y m d }
52 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
55 , _hd_uniqId = Just $ Text.pack $ show pubmed_id
56 , _hd_uniqIdBdd = Nothing
59 , _hd_authors = authors aus
60 , _hd_institutes = institutes aus
62 , _hd_abstract = abstract as
63 , _hd_publication_date = Just $ Text.pack $ show a
64 , _hd_publication_year = Just $ fromIntegral y
65 , _hd_publication_month = Just m
66 , _hd_publication_day = Just d
67 , _hd_publication_hour = Nothing
68 , _hd_publication_minute = Nothing
69 , _hd_publication_second = Nothing
70 , _hd_language_iso2 = Just $ (Text.pack . show) l }
72 authors :: [PubMedDoc.Author] -> Maybe Text
74 authors au = Just $ (Text.intercalate ", ")
76 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
78 institutes :: [PubMedDoc.Author] -> Maybe Text
79 institutes [] = Nothing
80 institutes au = Just $ (Text.intercalate ", ")
81 $ (map (Text.replace ", " " - "))
83 $ map PubMedDoc.affiliation au
86 abstract :: [Text] -> Maybe Text
88 abstract as' = Just $ Text.intercalate ", " as'