]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
Merge branch 'dev' into 151-dev-pubmed-api-key
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Pubmed.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Pubmed
14 where
15
16 import Conduit
17 import Data.Either (Either)
18 import Data.Maybe
19 import Data.Text (Text)
20 import qualified Data.Text as Text
21 import Servant.Client (ClientError)
22
23 import Gargantext.Prelude
24 import Gargantext.Core (Lang(..))
25 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
26
27 import qualified PUBMED as PubMed
28 import qualified PUBMED.Parser as PubMedDoc
29 import PUBMED.Types (Config(..))
30
31
32 type Query = Text
33 type Limit = PubMed.Limit
34
35
36 -- | TODO put default pubmed query in gargantext.ini
37 -- by default: 10K docs
38 get :: Maybe Text
39 -> Query
40 -> Maybe Limit
41 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
42 get mAPIKey q l = do
43 eRes <- PubMed.getMetadataWithC (Config { mAPIKey = mAPIKey }) q l
44 pure $ (\(len, docsC) -> (len, docsC .| mapC (toDoc EN))) <$> eRes
45 --either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
46 -- <$> PubMed.getMetadataWithC q l
47
48 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
49 toDoc l (PubMedDoc.PubMed { pubmed_id
50 , pubmed_article = PubMedDoc.PubMedArticle t j as aus
51 , pubmed_date = PubMedDoc.PubMedDate a y m d }
52 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
53 , _hd_doi = Nothing
54 , _hd_url = Nothing
55 , _hd_uniqId = Just $ Text.pack $ show pubmed_id
56 , _hd_uniqIdBdd = Nothing
57 , _hd_page = Nothing
58 , _hd_title = t
59 , _hd_authors = authors aus
60 , _hd_institutes = institutes aus
61 , _hd_source = j
62 , _hd_abstract = abstract as
63 , _hd_publication_date = Just $ Text.pack $ show a
64 , _hd_publication_year = Just $ fromIntegral y
65 , _hd_publication_month = Just m
66 , _hd_publication_day = Just d
67 , _hd_publication_hour = Nothing
68 , _hd_publication_minute = Nothing
69 , _hd_publication_second = Nothing
70 , _hd_language_iso2 = Just $ (Text.pack . show) l }
71 where
72 authors :: [PubMedDoc.Author] -> Maybe Text
73 authors [] = Nothing
74 authors au = Just $ (Text.intercalate ", ")
75 $ catMaybes
76 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
77
78 institutes :: [PubMedDoc.Author] -> Maybe Text
79 institutes [] = Nothing
80 institutes au = Just $ (Text.intercalate ", ")
81 $ (map (Text.replace ", " " - "))
82 $ catMaybes
83 $ map PubMedDoc.affiliation au
84
85
86 abstract :: [Text] -> Maybe Text
87 abstract [] = Nothing
88 abstract as' = Just $ Text.intercalate ", " as'