2 Module : Gargantext.API.Admin.Orchestrator
3 Description : Jobs Orchestrator
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE TypeOperators #-}
15 module Gargantext.API.Admin.Orchestrator where
17 import Control.Lens hiding (elements)
21 import Servant.Job.Async
22 import Servant.Job.Client
23 import Servant.Job.Server
24 import Servant.Job.Utils (extendBaseUrl)
25 import qualified Data.ByteString.Lazy.Char8 as LBS
27 import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
28 import Gargantext.API.Admin.Orchestrator.Types
29 import Gargantext.API.Admin.Types
30 import Gargantext.Prelude
32 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
33 => JobServerURL e Schedule o
36 callJobScrapy jurl schedule = do
37 progress $ NewTask jurl
38 out <- view job_output <$>
39 retryOnTransientFailure (clientCallbackJob' jurl
40 (fmap (const ()) . scrapySchedule . schedule))
41 progress $ Finished jurl Nothing
44 logConsole :: ToJSON a => a -> IO ()
45 logConsole = LBS.putStrLn . encode
47 callScraper :: MonadClientJob m => URL -> ScraperInput -> m JobLog
48 callScraper url input =
49 callJobScrapy jurl $ \cb ->
51 { s_project = "gargantext"
52 , s_spider = input ^. scin_spider
57 [("query", input ^.. scin_query . _Just)
58 ,("user", [input ^. scin_user])
59 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
60 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
61 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
62 ,("url", input ^.. scin_local_file . _Just)
63 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
64 ,("callback", [toUrlPiece cb])]
67 jurl :: JobServerURL JobLog Schedule JobLog
68 jurl = JobServerURL url Callback
70 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
71 -> (e -> IO ()) -> IO JobLog
72 pipeline scrapyurl client_env input log_status = do
73 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
74 either (panic . cs . show) pure e -- TODO throwError
76 -- TODO integrate to ServerT
78 -- * serveJobsAPI instead of simpleServeJobsAPI
79 -- * JobFunction instead of simpleJobFunction
80 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
81 scrapyOrchestrator env = do
82 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
83 defaultSettings (extendBaseUrl ("scraper" :: Text) $ env ^. env_self_url)
84 (env ^. env_manager) (LogEvent logConsole) $
85 simpleServeJobsAPI (env ^. env_scrapers) .
86 simpleJobFunction . pipeline (URL $ env ^. env_settings . scrapydUrl)