]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API.hs
[scrapers] fix limit with MAX_DOCS_SCRAPERS
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API
3 Description : All crawlers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.Core.Text.Corpus.API
13 ( ExternalAPIs(..)
14 , Query
15 , Limit
16 , get
17 , externalAPIs
18 )
19 where
20
21 import Conduit
22 import Data.Either (Either(..))
23 import Data.Maybe
24 import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
25 import Gargantext.Core (Lang(..))
26 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
27 import Gargantext.Prelude
28 import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
29 import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
30 import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
31 import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
32 import Servant.Client (ClientError)
33
34 -- | Get External API metadata main function
35 get :: ExternalAPIs
36 -> Lang
37 -> Query
38 -> Maybe Limit
39 -- -> IO [HyperdataDocument]
40 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
41 get PubMed _la q limit = PUBMED.get q limit
42 --docs <- PUBMED.get q default_limit -- EN only by default
43 --pure (Just $ fromIntegral $ length docs, yieldMany docs)
44 get HAL la q limit = HAL.getC la q limit
45 get IsTex la q limit = do
46 docs <- ISTEX.get la q limit
47 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
48 get Isidore la q limit = do
49 docs <- ISIDORE.get la (fromIntegral <$> limit) (Just q) Nothing
50 pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
51 get _ _ _ _ = undefined
52
53 -- | Some Sugar for the documentation
54 type Query = PUBMED.Query
55 type Limit = PUBMED.Limit