2 Module : Gargantext.API.Admin.Orchestrator
3 Description : Jobs Orchestrator
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE TypeOperators #-}
15 module Gargantext.API.Admin.Orchestrator where
17 import Control.Lens hiding (elements)
20 import Servant.Job.Async
21 import Servant.Job.Client
22 import qualified Data.ByteString.Lazy.Char8 as LBS
24 import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
25 import Gargantext.API.Admin.Orchestrator.Types
26 import Gargantext.Prelude
28 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
29 => JobServerURL e Schedule o
32 callJobScrapy jurl schedule = do
33 progress $ NewTask jurl
34 out <- view job_output <$>
35 retryOnTransientFailure (clientCallbackJob' jurl
36 (fmap (const ()) . scrapySchedule . schedule))
37 progress $ Finished jurl Nothing
40 logConsole :: ToJSON a => a -> IO ()
41 logConsole = LBS.putStrLn . encode
43 callScraper :: MonadClientJob m => URL -> ScraperInput -> m JobLog
44 callScraper url input =
45 callJobScrapy jurl $ \cb ->
47 { s_project = "gargantext"
48 , s_spider = input ^. scin_spider
53 [("query", input ^.. scin_query . _Just)
54 ,("user", [input ^. scin_user])
55 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
56 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
57 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
58 ,("url", input ^.. scin_local_file . _Just)
59 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
60 ,("callback", [toUrlPiece cb])]
63 jurl :: JobServerURL JobLog Schedule JobLog
64 jurl = JobServerURL url Callback
66 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
67 -> (e -> IO ()) -> IO JobLog
68 pipeline scrapyurl client_env input log_status = do
69 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
70 either (panic . cs . show) pure e -- TODO throwError
72 -- TODO integrate to ServerT
74 -- * serveJobsAPI instead of simpleServeJobsAPI
75 -- * JobFunction instead of simpleJobFunction
77 -- * HasSelfUrl or move self_url to settings
78 -- * HasScrapers or move scrapers to settings
82 import Servant.Job.Server
83 import Servant.Job.Utils (extendBaseUrl)
84 import Gargantext.API.Admin.Types
85 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
86 scrapyOrchestrator env = do
87 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
88 defaultSettings (extendBaseUrl ("scraper" :: Text) $ env ^. env_self_url)
89 (env ^. env_manager) (LogEvent logConsole) $
90 simpleServeJobsAPI (env ^. env_scrapers) .
91 simpleJobFunction . pipeline (URL $ env ^. settings . scrapydUrl)