]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
Merge remote-tracking branch 'origin/191-dev-list-upload-fixes' into dev-merge
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Pubmed.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Pubmed
14 where
15
16 import Conduit
17 import Control.Monad.Reader (runReaderT)
18 import Data.Either (Either)
19 import Data.Maybe
20 import Data.Text (Text)
21 import qualified Data.Text as Text
22 import Servant.Client (ClientError)
23
24 import Gargantext.Prelude
25 import Gargantext.Core (Lang(..))
26 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
27
28 import qualified PUBMED as PubMed
29 import qualified PUBMED.Parser as PubMedDoc
30 import PUBMED.Types (Config(..))
31
32
33 type Query = Text
34 type Limit = Integer
35
36
37 -- | TODO put default pubmed query in gargantext.ini
38 -- by default: 10K docs
39 get :: Maybe Text
40 -> Query
41 -> Maybe Limit
42 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
43 get mAPIKey q l = do
44 eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = mAPIKey
45 , query = q
46 , perPage = Just 200
47 , mWebEnv = Nothing })
48 let takeLimit = case l of
49 Nothing -> mapC identity
50 Just l' -> takeC $ fromIntegral l'
51 pure $ (\(len, docsC) -> (len, docsC .| takeLimit .| mapC (toDoc EN))) <$> eRes
52 --either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
53 -- <$> PubMed.getMetadataWithC q l
54
55 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
56 toDoc l (PubMedDoc.PubMed { pubmed_id
57 , pubmed_article = PubMedDoc.PubMedArticle t j as aus
58 , pubmed_date = PubMedDoc.PubMedDate a y m d }
59 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
60 , _hd_doi = Nothing
61 , _hd_url = Nothing
62 , _hd_uniqId = Just $ Text.pack $ show pubmed_id
63 , _hd_uniqIdBdd = Nothing
64 , _hd_page = Nothing
65 , _hd_title = t
66 , _hd_authors = authors aus
67 , _hd_institutes = institutes aus
68 , _hd_source = j
69 , _hd_abstract = abstract as
70 , _hd_publication_date = Just $ Text.pack $ show a
71 , _hd_publication_year = Just $ fromIntegral y
72 , _hd_publication_month = Just m
73 , _hd_publication_day = Just d
74 , _hd_publication_hour = Nothing
75 , _hd_publication_minute = Nothing
76 , _hd_publication_second = Nothing
77 , _hd_language_iso2 = Just $ (Text.pack . show) l }
78 where
79 authors :: [PubMedDoc.Author] -> Maybe Text
80 authors [] = Nothing
81 authors au = Just $ (Text.intercalate ", ")
82 $ catMaybes
83 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
84
85 institutes :: [PubMedDoc.Author] -> Maybe Text
86 institutes [] = Nothing
87 institutes au = Just $ (Text.intercalate ", ")
88 $ (map (Text.replace ", " " - "))
89 $ catMaybes
90 $ map PubMedDoc.affiliation au
91
92
93 abstract :: [Text] -> Maybe Text
94 abstract [] = Nothing
95 abstract as' = Just $ Text.intercalate ", " as'