2 Module : Gargantext.API.Admin.Orchestrator
3 Description : Jobs Orchestrator
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE TypeOperators #-}
15 module Gargantext.API.Admin.Orchestrator where
17 import Control.Lens hiding (elements)
20 import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
21 import Gargantext.API.Admin.Orchestrator.Types
22 import Gargantext.API.Admin.Settings
23 import Gargantext.Prelude
25 import Servant.Job.Async
26 import Servant.Job.Client
27 import Servant.Job.Server
28 import Servant.Job.Utils (extendBaseUrl)
29 import qualified Data.ByteString.Lazy.Char8 as LBS
31 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
32 => JobServerURL e Schedule o
35 callJobScrapy jurl schedule = do
36 progress $ NewTask jurl
37 out <- view job_output <$>
38 retryOnTransientFailure (clientCallbackJob' jurl
39 (fmap (const ()) . scrapySchedule . schedule))
40 progress $ Finished jurl Nothing
43 logConsole :: ToJSON a => a -> IO ()
44 logConsole = LBS.putStrLn . encode
46 callScraper :: MonadClientJob m => URL -> ScraperInput -> m JobLog
47 callScraper url input =
48 callJobScrapy jurl $ \cb ->
50 { s_project = "gargantext"
51 , s_spider = input ^. scin_spider
56 [("query", input ^.. scin_query . _Just)
57 ,("user", [input ^. scin_user])
58 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
59 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
60 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
61 ,("url", input ^.. scin_local_file . _Just)
62 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
63 ,("callback", [toUrlPiece cb])]
66 jurl :: JobServerURL JobLog Schedule JobLog
67 jurl = JobServerURL url Callback
69 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
70 -> (e -> IO ()) -> IO JobLog
71 pipeline scrapyurl client_env input log_status = do
72 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
73 either (panic . cs . show) pure e -- TODO throwError
75 -- TODO integrate to ServerT
77 -- * serveJobsAPI instead of simpleServeJobsAPI
78 -- * JobFunction instead of simpleJobFunction
79 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
80 scrapyOrchestrator env = do
81 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
82 defaultSettings (extendBaseUrl ("scraper" :: Text) $ env ^. env_self_url)
83 (env ^. env_manager) (LogEvent logConsole) $
84 simpleServeJobsAPI (env ^. env_scrapers) .
85 simpleJobFunction . pipeline (URL $ env ^. env_settings . scrapydUrl)