2 Module : Gargantext.API.Admin.Orchestrator
3 Description : Jobs Orchestrator
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE DataKinds #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE FlexibleInstances #-}
16 {-# LANGUAGE DeriveGeneric #-}
17 {-# LANGUAGE RankNTypes #-}
18 {-# LANGUAGE TemplateHaskell #-}
19 {-# LANGUAGE TypeOperators #-}
21 module Gargantext.API.Admin.Orchestrator where
23 import Gargantext.Prelude
24 import Gargantext.API.Admin.Settings
25 import Gargantext.API.Admin.Orchestrator.Types
26 import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
27 import Control.Lens hiding (elements)
29 import qualified Data.ByteString.Lazy.Char8 as LBS
31 import Servant.Job.Async
32 import Servant.Job.Client
33 import Servant.Job.Server
34 import Servant.Job.Utils (extendBaseUrl)
36 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
37 => JobServerURL e Schedule o
40 callJobScrapy jurl schedule = do
41 progress $ NewTask jurl
42 out <- view job_output <$>
43 retryOnTransientFailure (clientCallbackJob' jurl
44 (fmap (const ()) . scrapySchedule . schedule))
45 progress $ Finished jurl Nothing
48 logConsole :: ToJSON a => a -> IO ()
49 logConsole = LBS.putStrLn . encode
51 callScraper :: MonadClientJob m => URL -> ScraperInput -> m ScraperStatus
52 callScraper url input =
53 callJobScrapy jurl $ \cb ->
55 { s_project = "gargantext"
56 , s_spider = input ^. scin_spider
61 [("query", input ^.. scin_query . _Just)
62 ,("user", [input ^. scin_user])
63 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
64 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
65 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
66 ,("url", input ^.. scin_local_file . _Just)
67 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
68 ,("callback", [toUrlPiece cb])]
71 jurl :: JobServerURL ScraperStatus Schedule ScraperStatus
72 jurl = JobServerURL url Callback
74 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
75 -> (e -> IO ()) -> IO ScraperStatus
76 pipeline scrapyurl client_env input log_status = do
77 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
78 either (panic . cs . show) pure e -- TODO throwError
80 -- TODO integrate to ServerT
82 -- * serveJobsAPI instead of simpleServeJobsAPI
83 -- * JobFunction instead of simpleJobFunction
84 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
85 scrapyOrchestrator env = do
86 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
87 defaultSettings (extendBaseUrl ("scraper" :: String) $ env ^. env_self_url)
88 (env ^. env_manager) (LogEvent logConsole) $
89 simpleServeJobsAPI (env ^. env_scrapers) .
90 simpleJobFunction . pipeline (URL $ env ^. env_settings . scrapydUrl)