2 Module : Gargantext.Core.Text.Corpus.API
3 Description : All crawlers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Text.Corpus.API
22 import Control.Lens ((^.))
24 import Data.Either (Either(..))
26 import qualified Data.Text as T
27 import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
28 import Gargantext.Core (Lang(..))
29 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
30 import Gargantext.Prelude
31 import Gargantext.Prelude.Config (GargConfig, gc_pubmed_api_key)
32 import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
33 import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
34 import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
35 import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
36 import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
37 import qualified Gargantext.Core.Text.Corpus.Query as Corpus
38 import Servant.Client (ClientError)
41 = -- | We couldn't parse the user input query into something meaningful.
42 InvalidInputQuery !Corpus.RawQuery !T.Text
43 -- | The external service returned an error.
44 | ExternalAPIError !ClientError
47 -- | Get External API metadata main function
53 -- -> IO [HyperdataDocument]
54 -> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
55 get cfg externalAPI la q limit = do
56 case Corpus.parseQuery q of
57 Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
58 Right corpusQuery -> case externalAPI of
59 PubMed -> first ExternalAPIError <$>
60 PUBMED.get (cfg ^. gc_pubmed_api_key) (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
61 --docs <- PUBMED.get q default_limit -- EN only by default
62 --pure (Just $ fromIntegral $ length docs, yieldMany docs)
63 Arxiv -> Right <$> Arxiv.get la corpusQuery (Corpus.getLimit <$> limit)
64 HAL -> first ExternalAPIError <$> HAL.getC la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
65 IsTex -> do docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
66 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
67 Isidore -> do docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
68 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)