]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
[API] simplify parsing of date
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Pubmed.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Pubmed
14 where
15
16 import Data.Maybe
17 import Data.Text (Text)
18 import qualified Data.Text as Text
19
20 import Gargantext.Prelude
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
23
24 import qualified PUBMED as PubMed
25 import qualified PUBMED.Parser as PubMedDoc
26
27
28 type Query = Text
29 type Limit = PubMed.Limit
30
31
32 -- | TODO put default pubmed query in gargantext.ini
33 -- by default: 10K docs
34 get :: Query -> Maybe Limit -> IO [HyperdataDocument]
35 get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
36 <$> PubMed.getMetadataWith q l
37
38 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
39 toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
40 (PubMedDoc.PubMedDate a y m d)
41 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
42 , _hd_doi = Nothing
43 , _hd_url = Nothing
44 , _hd_uniqId = Nothing
45 , _hd_uniqIdBdd = Nothing
46 , _hd_page = Nothing
47 , _hd_title = t
48 , _hd_authors = authors aus
49 , _hd_institutes = institutes aus
50 , _hd_source = j
51 , _hd_abstract = abstract as
52 , _hd_publication_date = Just $ Text.pack $ show a
53 , _hd_publication_year = Just $ fromIntegral y
54 , _hd_publication_month = Just m
55 , _hd_publication_day = Just d
56 , _hd_publication_hour = Nothing
57 , _hd_publication_minute = Nothing
58 , _hd_publication_second = Nothing
59 , _hd_language_iso2 = Just $ (Text.pack . show) l }
60 where
61 authors :: Maybe [PubMedDoc.Author] -> Maybe Text
62 authors aus' = case aus' of
63 Nothing -> Nothing
64 Just au -> Just $ (Text.intercalate ", ")
65 $ catMaybes
66 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
67
68 institutes :: Maybe [PubMedDoc.Author] -> Maybe Text
69 institutes aus' = case aus' of
70 Nothing -> Nothing
71 Just au -> Just $ (Text.intercalate ", ")
72 $ (map (Text.replace ", " " - "))
73 $ catMaybes
74 $ map PubMedDoc.affiliation au
75
76
77 abstract :: Maybe [Text] -> Maybe Text
78 abstract as' = fmap (Text.intercalate ", ") as'
79