2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 module Gargantext.Core.Text.Corpus.API.Pubmed
17 import Data.Text (Text)
18 import qualified Data.Text as Text
20 import Gargantext.Prelude
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
24 import qualified PUBMED as PubMed
25 import qualified PUBMED.Parser as PubMedDoc
29 type Limit = PubMed.Limit
32 -- | TODO put default pubmed query in gargantext.ini
33 -- by default: 10K docs
34 get :: Query -> Maybe Limit -> IO [HyperdataDocument]
35 get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
36 <$> PubMed.getMetadataWith q l
38 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
39 toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
40 (PubMedDoc.PubMedDate a y m d)
41 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
44 , _hd_uniqId = Nothing
45 , _hd_uniqIdBdd = Nothing
48 , _hd_authors = authors aus
49 , _hd_institutes = institutes aus
51 , _hd_abstract = abstract as
52 , _hd_publication_date = Just $ Text.pack $ show a
53 , _hd_publication_year = Just $ fromIntegral y
54 , _hd_publication_month = Just m
55 , _hd_publication_day = Just d
56 , _hd_publication_hour = Nothing
57 , _hd_publication_minute = Nothing
58 , _hd_publication_second = Nothing
59 , _hd_language_iso2 = Just $ (Text.pack . show) l }
61 authors :: [PubMedDoc.Author] -> Maybe Text
63 authors au = Just $ (Text.intercalate ", ")
65 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
67 institutes :: [PubMedDoc.Author] -> Maybe Text
68 institutes [] = Nothing
69 institutes au = Just $ (Text.intercalate ", ")
70 $ (map (Text.replace ", " " - "))
72 $ map PubMedDoc.affiliation au
75 abstract :: [Text] -> Maybe Text
77 abstract as' = Just $ Text.intercalate ", " as'