]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
[conduit] implement conduit for Hal, Pubmed
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Pubmed.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Pubmed
14 where
15
16 import Conduit
17 import Data.Either (Either)
18 import Data.Maybe
19 import Data.Text (Text)
20 import qualified Data.Text as Text
21 import Servant.Client (ClientError)
22
23 import Gargantext.Prelude
24 import Gargantext.Core (Lang(..))
25 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
26
27 import qualified PUBMED as PubMed
28 import qualified PUBMED.Parser as PubMedDoc
29
30
31 type Query = Text
32 type Limit = PubMed.Limit
33
34
35 -- | TODO put default pubmed query in gargantext.ini
36 -- by default: 10K docs
37 get :: Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
38 get q l = do
39 eRes <- PubMed.getMetadataWithC q l
40 pure $ (\(len, docsC) -> (len, docsC .| mapC (toDoc EN))) <$> eRes
41 --either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
42 -- <$> PubMed.getMetadataWithC q l
43
44 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
45 toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
46 (PubMedDoc.PubMedDate a y m d)
47 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
48 , _hd_doi = Nothing
49 , _hd_url = Nothing
50 , _hd_uniqId = Nothing
51 , _hd_uniqIdBdd = Nothing
52 , _hd_page = Nothing
53 , _hd_title = t
54 , _hd_authors = authors aus
55 , _hd_institutes = institutes aus
56 , _hd_source = j
57 , _hd_abstract = abstract as
58 , _hd_publication_date = Just $ Text.pack $ show a
59 , _hd_publication_year = Just $ fromIntegral y
60 , _hd_publication_month = Just m
61 , _hd_publication_day = Just d
62 , _hd_publication_hour = Nothing
63 , _hd_publication_minute = Nothing
64 , _hd_publication_second = Nothing
65 , _hd_language_iso2 = Just $ (Text.pack . show) l }
66 where
67 authors :: [PubMedDoc.Author] -> Maybe Text
68 authors [] = Nothing
69 authors au = Just $ (Text.intercalate ", ")
70 $ catMaybes
71 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
72
73 institutes :: [PubMedDoc.Author] -> Maybe Text
74 institutes [] = Nothing
75 institutes au = Just $ (Text.intercalate ", ")
76 $ (map (Text.replace ", " " - "))
77 $ catMaybes
78 $ map PubMedDoc.affiliation au
79
80
81 abstract :: [Text] -> Maybe Text
82 abstract [] = Nothing
83 abstract as' = Just $ Text.intercalate ", " as'
84