]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API.hs
Merge branch 'dev' into 551-dev-graphql-contexts-ngrams
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API
3 Description : All crawlers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.Core.Text.Corpus.API
13 ( ExternalAPIs(..)
14 , Corpus.RawQuery(..)
15 , Corpus.Limit(..)
16 , GetCorpusError(..)
17 , get
18 , externalAPIs
19 ) where
20
21 import Conduit
22 import Control.Lens ((^.))
23 import Data.Bifunctor
24 import Data.Either (Either(..))
25 import Data.Maybe
26 import qualified Data.Text as T
27 import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
28 import Gargantext.Core (Lang(..))
29 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
30 import Gargantext.Prelude
31 import Gargantext.Prelude.Config (GargConfig, gc_pubmed_api_key)
32 import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
33 import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
34 import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
35 import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
36 import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
37 import qualified Gargantext.Core.Text.Corpus.Query as Corpus
38 import Servant.Client (ClientError)
39
40 data GetCorpusError
41 = -- | We couldn't parse the user input query into something meaningful.
42 InvalidInputQuery !Corpus.RawQuery !T.Text
43 -- | The external service returned an error.
44 | ExternalAPIError !ClientError
45 deriving (Show, Eq)
46
47 -- | Get External API metadata main function
48 get :: GargConfig
49 -> ExternalAPIs
50 -> Lang
51 -> Corpus.RawQuery
52 -> Maybe Corpus.Limit
53 -- -> IO [HyperdataDocument]
54 -> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
55 get cfg externalAPI la q limit = do
56 case Corpus.parseQuery q of
57 Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
58 Right corpusQuery -> case externalAPI of
59 PubMed -> first ExternalAPIError <$>
60 PUBMED.get (cfg ^. gc_pubmed_api_key) corpusQuery limit
61 --docs <- PUBMED.get q default_limit -- EN only by default
62 --pure (Just $ fromIntegral $ length docs, yieldMany docs)
63 Arxiv -> Right <$> Arxiv.get la corpusQuery limit
64 HAL -> first ExternalAPIError <$> HAL.getC la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
65 IsTex -> do docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
66 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
67 Isidore -> do docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
68 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)