2 Module : Gargantext.API.Admin.Orchestrator
3 Description : Jobs Orchestrator
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE TypeOperators #-}
15 module Gargantext.API.Admin.Orchestrator where
17 import Control.Lens hiding (elements)
19 import qualified Data.ByteString.Lazy.Char8 as LBS
22 import Servant.Job.Async
23 import Servant.Job.Client
24 import Servant.Job.Server
25 import Servant.Job.Utils (extendBaseUrl)
27 import Gargantext.Prelude
28 import Gargantext.API.Admin.Settings
29 import Gargantext.API.Admin.Orchestrator.Types
30 import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
32 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
33 => JobServerURL e Schedule o
36 callJobScrapy jurl schedule = do
37 progress $ NewTask jurl
38 out <- view job_output <$>
39 retryOnTransientFailure (clientCallbackJob' jurl
40 (fmap (const ()) . scrapySchedule . schedule))
41 progress $ Finished jurl Nothing
44 logConsole :: ToJSON a => a -> IO ()
45 logConsole = LBS.putStrLn . encode
47 callScraper :: MonadClientJob m => URL -> ScraperInput -> m ScraperStatus
48 callScraper url input =
49 callJobScrapy jurl $ \cb ->
51 { s_project = "gargantext"
52 , s_spider = input ^. scin_spider
57 [("query", input ^.. scin_query . _Just)
58 ,("user", [input ^. scin_user])
59 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
60 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
61 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
62 ,("url", input ^.. scin_local_file . _Just)
63 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
64 ,("callback", [toUrlPiece cb])]
67 jurl :: JobServerURL ScraperStatus Schedule ScraperStatus
68 jurl = JobServerURL url Callback
70 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
71 -> (e -> IO ()) -> IO ScraperStatus
72 pipeline scrapyurl client_env input log_status = do
73 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
74 either (panic . cs . show) pure e -- TODO throwError
76 -- TODO integrate to ServerT
78 -- * serveJobsAPI instead of simpleServeJobsAPI
79 -- * JobFunction instead of simpleJobFunction
80 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
81 scrapyOrchestrator env = do
82 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
83 defaultSettings (extendBaseUrl ("scraper" :: Text) $ env ^. env_self_url)
84 (env ^. env_manager) (LogEvent logConsole) $
85 simpleServeJobsAPI (env ^. env_scrapers) .
86 simpleJobFunction . pipeline (URL $ env ^. env_settings . scrapydUrl)