2 Module : Gargantext.Core.Text.Corpus.API
3 Description : All crawlers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Text.Corpus.API
23 import Data.Either (Either(..))
25 import qualified Data.Text as T
26 import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
27 import Gargantext.Core (Lang(..))
28 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
29 import Gargantext.Prelude
30 import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
31 import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
32 import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
33 import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
34 import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
35 import qualified Gargantext.Core.Text.Corpus.Query as Corpus
36 import qualified PUBMED.Types as PUBMED
37 import Servant.Client (ClientError)
40 = -- | We couldn't parse the user input query into something meaningful.
41 InvalidInputQuery !Corpus.RawQuery !T.Text
42 -- | The external service returned an error.
43 | ExternalAPIError !ClientError
46 -- | Get External API metadata main function
50 -> Maybe PUBMED.APIKey
52 -- -> IO [HyperdataDocument]
53 -> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
54 get externalAPI la q mPubmedAPIKey limit = do
55 case Corpus.parseQuery q of
56 Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
57 Right corpusQuery -> case externalAPI of
58 PubMed -> first ExternalAPIError <$>
59 PUBMED.get (fromMaybe "" mPubmedAPIKey) corpusQuery limit
60 --docs <- PUBMED.get q default_limit -- EN only by default
61 --pure (Just $ fromIntegral $ length docs, yieldMany docs)
62 Arxiv -> Right <$> Arxiv.get la corpusQuery limit
63 HAL -> first ExternalAPIError <$> HAL.getC la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
64 IsTex -> do docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
65 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
66 Isidore -> do docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
67 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)