]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
Merge branch '111-dev-refactor-text-corpus-api-with-conduit-alp' of ssh://gitlab...
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Pubmed.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Pubmed
14 where
15
16 import Conduit
17 import Data.Either (Either)
18 import Data.Maybe
19 import Data.Text (Text)
20 import qualified Data.Text as Text
21 import Servant.Client (ClientError)
22
23 import Gargantext.Prelude
24 import Gargantext.Core (Lang(..))
25 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
26
27 import qualified PUBMED as PubMed
28 import qualified PUBMED.Parser as PubMedDoc
29
30
31 type Query = Text
32 type Limit = PubMed.Limit
33
34
35 -- | TODO put default pubmed query in gargantext.ini
36 -- by default: 10K docs
37 get :: Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
38 get q l = do
39 eRes <- PubMed.getMetadataWithC q l
40 pure $ (\(len, docsC) -> (len, docsC .| mapC (toDoc EN))) <$> eRes
41 --either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
42 -- <$> PubMed.getMetadataWithC q l
43
44 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
45 toDoc l (PubMedDoc.PubMed { pubmed_id
46 , pubmed_article = PubMedDoc.PubMedArticle t j as aus
47 , pubmed_date = PubMedDoc.PubMedDate a y m d }
48 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
49 , _hd_doi = Nothing
50 , _hd_url = Nothing
51 , _hd_uniqId = Just $ Text.pack $ show pubmed_id
52 , _hd_uniqIdBdd = Nothing
53 , _hd_page = Nothing
54 , _hd_title = t
55 , _hd_authors = authors aus
56 , _hd_institutes = institutes aus
57 , _hd_source = j
58 , _hd_abstract = abstract as
59 , _hd_publication_date = Just $ Text.pack $ show a
60 , _hd_publication_year = Just $ fromIntegral y
61 , _hd_publication_month = Just m
62 , _hd_publication_day = Just d
63 , _hd_publication_hour = Nothing
64 , _hd_publication_minute = Nothing
65 , _hd_publication_second = Nothing
66 , _hd_language_iso2 = Just $ (Text.pack . show) l }
67 where
68 authors :: [PubMedDoc.Author] -> Maybe Text
69 authors [] = Nothing
70 authors au = Just $ (Text.intercalate ", ")
71 $ catMaybes
72 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
73
74 institutes :: [PubMedDoc.Author] -> Maybe Text
75 institutes [] = Nothing
76 institutes au = Just $ (Text.intercalate ", ")
77 $ (map (Text.replace ", " " - "))
78 $ catMaybes
79 $ map PubMedDoc.affiliation au
80
81
82 abstract :: [Text] -> Maybe Text
83 abstract [] = Nothing
84 abstract as' = Just $ Text.intercalate ", " as'
85