]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
[FIX] Order 2 regression and split of clustering
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / Pubmed.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.Pubmed
3 Description : Pubmed API connection
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Text.Corpus.API.Pubmed
14 where
15
16 import Conduit
17 import Control.Monad.Reader (runReaderT)
18 import Data.Either (Either)
19 import Data.Maybe
20 import Data.Text (Text)
21 import qualified Data.Text as Text
22 import Servant.Client (ClientError)
23
24 import Gargantext.Prelude
25 import Gargantext.Core (Lang(..))
26 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
27
28 import qualified PUBMED as PubMed
29 import qualified PUBMED.Parser as PubMedDoc
30 import PUBMED.Types (Config(..))
31
32
33 type Query = Text
34 type Limit = Integer
35
36
37 -- | TODO put default pubmed query in gargantext.ini
38 -- by default: 10K docs
39 get :: Maybe Text
40 -> Query
41 -> Maybe Limit
42 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
43 get mAPIKey q l = do
44 eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = mAPIKey
45 , query = q
46 , perPage = Nothing })
47 let takeLimit = case l of
48 Nothing -> mapC identity
49 Just l' -> takeC $ fromIntegral l'
50 pure $ (\(len, docsC) -> (len, docsC .| takeLimit .| mapC (toDoc EN))) <$> eRes
51 --either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
52 -- <$> PubMed.getMetadataWithC q l
53
54 toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
55 toDoc l (PubMedDoc.PubMed { pubmed_id
56 , pubmed_article = PubMedDoc.PubMedArticle t j as aus
57 , pubmed_date = PubMedDoc.PubMedDate a y m d }
58 ) = HyperdataDocument { _hd_bdd = Just "PubMed"
59 , _hd_doi = Nothing
60 , _hd_url = Nothing
61 , _hd_uniqId = Just $ Text.pack $ show pubmed_id
62 , _hd_uniqIdBdd = Nothing
63 , _hd_page = Nothing
64 , _hd_title = t
65 , _hd_authors = authors aus
66 , _hd_institutes = institutes aus
67 , _hd_source = j
68 , _hd_abstract = abstract as
69 , _hd_publication_date = Just $ Text.pack $ show a
70 , _hd_publication_year = Just $ fromIntegral y
71 , _hd_publication_month = Just m
72 , _hd_publication_day = Just d
73 , _hd_publication_hour = Nothing
74 , _hd_publication_minute = Nothing
75 , _hd_publication_second = Nothing
76 , _hd_language_iso2 = Just $ (Text.pack . show) l }
77 where
78 authors :: [PubMedDoc.Author] -> Maybe Text
79 authors [] = Nothing
80 authors au = Just $ (Text.intercalate ", ")
81 $ catMaybes
82 $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
83
84 institutes :: [PubMedDoc.Author] -> Maybe Text
85 institutes [] = Nothing
86 institutes au = Just $ (Text.intercalate ", ")
87 $ (map (Text.replace ", " " - "))
88 $ catMaybes
89 $ map PubMedDoc.affiliation au
90
91
92 abstract :: [Text] -> Maybe Text
93 abstract [] = Nothing
94 abstract as' = Just $ Text.intercalate ", " as'