]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
[FIX] Ngrams in list
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Pubmed.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Pubmed
14 where
15
16 import Data.Maybe
17 import Data.Text (Text)
18 import qualified Data.Text as Text
19
20 import Gargantext.Prelude
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
23
24 import qualified PUBMED as PubMed
25 import qualified PUBMED.Parser as PubMedDoc
26
27
28 type Query = Text
29 type Limit = PubMed.Limit
30
31
32 -- | TODO put default pubmed query in gargantext.ini
33 -- by default: 10K docs
34 get :: Query -> Maybe Limit -> IO [HyperdataDocument]
35 get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
36 <$> PubMed.getMetadataWith q l
37
38 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
39 toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
40 (PubMedDoc.PubMedDate a y m d)
41 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
42 , _hd_doi = Nothing
43 , _hd_url = Nothing
44 , _hd_uniqId = Nothing
45 , _hd_uniqIdBdd = Nothing
46 , _hd_page = Nothing
47 , _hd_title = t
48 , _hd_authors = authors aus
49 , _hd_institutes = institutes aus
50 , _hd_source = j
51 , _hd_abstract = abstract as
52 , _hd_publication_date = Just $ Text.pack $ show a
53 , _hd_publication_year = Just $ fromIntegral y
54 , _hd_publication_month = Just m
55 , _hd_publication_day = Just d
56 , _hd_publication_hour = Nothing
57 , _hd_publication_minute = Nothing
58 , _hd_publication_second = Nothing
59 , _hd_language_iso2 = Just $ (Text.pack . show) l }
60 where
61 authors :: [PubMedDoc.Author] -> Maybe Text
62 authors [] = Nothing
63 authors au = Just $ (Text.intercalate ", ")
64 $ catMaybes
65 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
66
67 institutes :: [PubMedDoc.Author] -> Maybe Text
68 institutes [] = Nothing
69 institutes au = Just $ (Text.intercalate ", ")
70 $ (map (Text.replace ", " " - "))
71 $ catMaybes
72 $ map PubMedDoc.affiliation au
73
74
75 abstract :: [Text] -> Maybe Text
76 abstract [] = Nothing
77 abstract as' = Just $ Text.intercalate ", " as'
78