]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
WIP - Add Gargantext.Core.Text.Corpus.Query
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Pubmed.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Pubmed
14 ( get )
15 where
16
17 import Conduit
18 import Control.Monad.Reader (runReaderT)
19 import Data.Either (Either)
20 import Data.Maybe
21 import Data.Text (Text)
22 import qualified Data.Text as Text
23 import Servant.Client (ClientError)
24
25 import Gargantext.Prelude
26 import Gargantext.Core (Lang(..))
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
28
29 import qualified PUBMED as PubMed
30 import qualified PUBMED.Parser as PubMedDoc
31 import PUBMED.Types (Config(..))
32
33
34 type Query = Text
35 type Limit = Int
36
37
38 -- | TODO put default pubmed query in gargantext.ini
39 -- by default: 10K docs
40 get :: Text
41 -> Query
42 -> Maybe Limit
43 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
44 get apiKey q l = do
45 eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = Just apiKey
46 , query = q
47 , perPage = Just 200
48 , mWebEnv = Nothing })
49 let takeLimit = case l of
50 Nothing -> mapC identity
51 Just l' -> takeC $ fromIntegral l'
52 pure $ (\(len, docsC) -> (len, docsC .| takeLimit .| mapC (toDoc EN))) <$> eRes
53 --either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
54 -- <$> PubMed.getMetadataWithC q l
55
56 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
57 toDoc l (PubMedDoc.PubMed { pubmed_id
58 , pubmed_article = PubMedDoc.PubMedArticle t j as aus
59 , pubmed_date = PubMedDoc.PubMedDate a y m d }
60 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
61 , _hd_doi = Nothing
62 , _hd_url = Nothing
63 , _hd_uniqId = Just $ Text.pack $ show pubmed_id
64 , _hd_uniqIdBdd = Nothing
65 , _hd_page = Nothing
66 , _hd_title = t
67 , _hd_authors = authors aus
68 , _hd_institutes = institutes aus
69 , _hd_source = j
70 , _hd_abstract = abstract as
71 , _hd_publication_date = Just $ Text.pack $ show a
72 , _hd_publication_year = Just $ fromIntegral y
73 , _hd_publication_month = Just m
74 , _hd_publication_day = Just d
75 , _hd_publication_hour = Nothing
76 , _hd_publication_minute = Nothing
77 , _hd_publication_second = Nothing
78 , _hd_language_iso2 = Just $ (Text.pack . show) l }
79 where
80 authors :: [PubMedDoc.Author] -> Maybe Text
81 authors [] = Nothing
82 authors au = Just $ (Text.intercalate ", ")
83 $ catMaybes
84 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
85
86 institutes :: [PubMedDoc.Author] -> Maybe Text
87 institutes [] = Nothing
88 institutes au = Just $ (Text.intercalate ", ")
89 $ (map (Text.replace ", " " - "))
90 $ catMaybes
91 $ map PubMedDoc.affiliation au
92
93
94 abstract :: [Text] -> Maybe Text
95 abstract [] = Nothing
96 abstract as' = Just $ Text.intercalate ", " as'