2 Module : Gargantext.Core.Text.Corpus.API
3 Description : All crawlers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Text.Corpus.API
23 import Data.Either (Either(..))
25 import qualified Data.Text as T
26 import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
27 import Gargantext.Core (Lang(..))
28 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
29 import Gargantext.Prelude
30 import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
31 import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
32 import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
33 import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
34 import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
35 import qualified Gargantext.Core.Text.Corpus.Query as Corpus
36 import Servant.Client (ClientError)
39 = -- | We couldn't parse the user input query into something meaningful.
40 InvalidInputQuery !Corpus.RawQuery !T.Text
41 -- | The external service returned an error.
42 | ExternalAPIError !ClientError
45 -- | Get External API metadata main function
50 -- -> IO [HyperdataDocument]
51 -> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
53 case Corpus.parseQuery q of
54 Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
57 PubMed { mAPIKey = mAPIKey } -> first ExternalAPIError <$>
58 PUBMED.get (fromMaybe "" mAPIKey) corpusQuery limit
59 --docs <- PUBMED.get q default_limit -- EN only by default
60 --pure (Just $ fromIntegral $ length docs, yieldMany docs)
61 Arxiv -> Right <$> Arxiv.get la corpusQuery limit
62 HAL -> first ExternalAPIError <$>
63 HAL.getC la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
65 docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
66 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
68 docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
69 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
71 panic $ "[G.C.T.Corpus.API] This options are note taken into account: " <> (cs $ show externalApi)
73 -- | Some Sugar for the documentation
74 -- type Query = PUBMED.Query
75 -- type Limit = PUBMED.Limit