2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 module Gargantext.Core.Text.Corpus.API.Pubmed
17 import Control.Monad.Reader (runReaderT)
18 import Data.Either (Either)
20 import Data.Text (Text)
21 import qualified Data.Text as Text
22 import Servant.Client (ClientError)
24 import Gargantext.Prelude
25 import Gargantext.Core (Lang(..))
26 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
28 import qualified PUBMED as PubMed
29 import qualified PUBMED.Parser as PubMedDoc
30 import PUBMED.Types (Config(..))
37 -- | TODO put default pubmed query in gargantext.ini
38 -- by default: 10K docs
42 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
44 eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = mAPIKey
47 , mWebEnv = Nothing })
48 let takeLimit = case l of
49 Nothing -> mapC identity
50 Just l' -> takeC $ fromIntegral l'
51 pure $ (\(len, docsC) -> (len, docsC .| takeLimit .| mapC (toDoc EN))) <$> eRes
52 --either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
53 -- <$> PubMed.getMetadataWithC q l
55 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
56 toDoc l (PubMedDoc.PubMed { pubmed_id
57 , pubmed_article = PubMedDoc.PubMedArticle t j as aus
58 , pubmed_date = PubMedDoc.PubMedDate a y m d }
59 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
62 , _hd_uniqId = Just $ Text.pack $ show pubmed_id
63 , _hd_uniqIdBdd = Nothing
66 , _hd_authors = authors aus
67 , _hd_institutes = institutes aus
69 , _hd_abstract = abstract as
70 , _hd_publication_date = Just $ Text.pack $ show a
71 , _hd_publication_year = Just $ fromIntegral y
72 , _hd_publication_month = Just m
73 , _hd_publication_day = Just d
74 , _hd_publication_hour = Nothing
75 , _hd_publication_minute = Nothing
76 , _hd_publication_second = Nothing
77 , _hd_language_iso2 = Just $ (Text.pack . show) l }
79 authors :: [PubMedDoc.Author] -> Maybe Text
81 authors au = Just $ (Text.intercalate ", ")
83 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
85 institutes :: [PubMedDoc.Author] -> Maybe Text
86 institutes [] = Nothing
87 institutes au = Just $ (Text.intercalate ", ")
88 $ (map (Text.replace ", " " - "))
90 $ map PubMedDoc.affiliation au
93 abstract :: [Text] -> Maybe Text
95 abstract as' = Just $ Text.intercalate ", " as'