]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API.hs
[openalex] add support for language filter in queries
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API
3 Description : All crawlers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.Core.Text.Corpus.API
13 ( ExternalAPIs(..)
14 , Corpus.RawQuery(..)
15 , Corpus.Limit(..)
16 , GetCorpusError(..)
17 , get
18 , externalAPIs
19 ) where
20
21 import Conduit
22 import Data.Bifunctor
23 import Data.Either (Either(..))
24 import Data.Maybe
25 import qualified Data.Text as T
26 import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
27 import Gargantext.Core (Lang(..))
28 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
29 import Gargantext.Prelude
30 import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
31 import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
32 import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
33 import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
34 import qualified Gargantext.Core.Text.Corpus.API.OpenAlex as OpenAlex
35 import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
36 import qualified Gargantext.Core.Text.Corpus.Query as Corpus
37 import qualified PUBMED.Types as PUBMED
38 import Servant.Client (ClientError)
39
40 data GetCorpusError
41 = -- | We couldn't parse the user input query into something meaningful.
42 InvalidInputQuery !Corpus.RawQuery !T.Text
43 -- | The external service returned an error.
44 | ExternalAPIError !ClientError
45 deriving (Show, Eq)
46
47 -- | Get External API metadata main function
48 get :: ExternalAPIs
49 -> Lang
50 -> Corpus.RawQuery
51 -> Maybe PUBMED.APIKey
52 -> Maybe Corpus.Limit
53 -- -> IO [HyperdataDocument]
54 -> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
55 get externalAPI la q mPubmedAPIKey limit = do
56 case Corpus.parseQuery q of
57 Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
58 Right corpusQuery -> case externalAPI of
59 OpenAlex -> first ExternalAPIError <$>
60 OpenAlex.get (fromMaybe "" Nothing {- email -}) q la limit
61 PubMed -> first ExternalAPIError <$>
62 PUBMED.get (fromMaybe "" mPubmedAPIKey) corpusQuery limit
63 --docs <- PUBMED.get q default_limit -- EN only by default
64 --pure (Just $ fromIntegral $ length docs, yieldMany docs)
65 Arxiv -> Right <$> Arxiv.get la corpusQuery limit
66 HAL -> first ExternalAPIError <$> HAL.getC la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
67 IsTex -> do docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
68 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
69 Isidore -> do docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
70 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)