]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API.hs
Revert b3fb1a1697d18777af6b401c132c39a5c905e129
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API
3 Description : All crawlers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.Core.Text.Corpus.API
13 ( ExternalAPIs(..)
14 , Corpus.RawQuery(..)
15 , Corpus.Limit(..)
16 , GetCorpusError(..)
17 , get
18 , externalAPIs
19 ) where
20
21 import Conduit
22 import Data.Bifunctor
23 import Data.Either (Either(..))
24 import Data.Maybe
25 import qualified Data.Text as T
26 import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
27 import Gargantext.Core (Lang(..))
28 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
29 import Gargantext.Prelude
30 import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
31 import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
32 import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
33 import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
34 import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
35 import qualified Gargantext.Core.Text.Corpus.Query as Corpus
36 import Servant.Client (ClientError)
37
38 data GetCorpusError
39 = -- | We couldn't parse the user input query into something meaningful.
40 InvalidInputQuery !Corpus.RawQuery !T.Text
41 -- | The external service returned an error.
42 | ExternalAPIError !ClientError
43 deriving (Show, Eq)
44
45 -- | Get External API metadata main function
46 get :: ExternalAPIs
47 -> Lang
48 -> Corpus.RawQuery
49 -> Maybe Corpus.Limit
50 -- -> IO [HyperdataDocument]
51 -> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
52 get api la q limit =
53 case Corpus.parseQuery q of
54 Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
55 Right corpusQuery ->
56 case api of
57 PubMed { mAPIKey = mAPIKey } -> first ExternalAPIError <$>
58 PUBMED.get (fromMaybe "" mAPIKey) corpusQuery limit
59 --docs <- PUBMED.get q default_limit -- EN only by default
60 --pure (Just $ fromIntegral $ length docs, yieldMany docs)
61 Arxiv -> Right <$> Arxiv.get la corpusQuery limit
62 HAL -> first ExternalAPIError <$>
63 HAL.getC la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
64 IsTex -> do
65 docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
66 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
67 Isidore -> do
68 docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
69 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
70 externalApi ->
71 panic $ "[G.C.T.Corpus.API] This options are note taken into account: " <> (cs $ show externalApi)
72
73 -- | Some Sugar for the documentation
74 -- type Query = PUBMED.Query
75 -- type Limit = PUBMED.Limit