2 Module : Gargantext.Core.Text.Corpus.API
3 Description : All crawlers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Text.Corpus.API
23 import Data.Either (Either(..))
25 import qualified Data.Text as T
26 import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
27 import Gargantext.Core (Lang(..))
28 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
29 import Gargantext.Prelude
30 import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
31 import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
32 import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
33 import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
34 import qualified Gargantext.Core.Text.Corpus.API.OpenAlex as OpenAlex
35 import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
36 import qualified Gargantext.Core.Text.Corpus.Query as Corpus
37 import qualified PUBMED.Types as PUBMED
38 import Servant.Client (ClientError)
41 = -- | We couldn't parse the user input query into something meaningful.
42 InvalidInputQuery !Corpus.RawQuery !T.Text
43 -- | The external service returned an error.
44 | ExternalAPIError !ClientError
47 -- | Get External API metadata main function
51 -> Maybe PUBMED.APIKey
53 -- -> IO [HyperdataDocument]
54 -> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
55 get externalAPI la q mPubmedAPIKey limit = do
56 case Corpus.parseQuery q of
57 Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
58 Right corpusQuery -> case externalAPI of
59 OpenAlex -> first ExternalAPIError <$>
60 OpenAlex.get (fromMaybe "" Nothing {- email -}) q la limit
61 PubMed -> first ExternalAPIError <$>
62 PUBMED.get (fromMaybe "" mPubmedAPIKey) corpusQuery limit
63 --docs <- PUBMED.get q default_limit -- EN only by default
64 --pure (Just $ fromIntegral $ length docs, yieldMany docs)
65 Arxiv -> Right <$> Arxiv.get la corpusQuery limit
66 HAL -> first ExternalAPIError <$> HAL.getC la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
67 IsTex -> do docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
68 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
69 Isidore -> do docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
70 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)