2 Module : Gargantext.API.Admin.Orchestrator
3 Description : Jobs Orchestrator
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.API.Admin.Orchestrator where
14 import Control.Lens hiding (elements)
17 import Servant.Job.Async
18 import Servant.Job.Client
19 import qualified Data.ByteString.Lazy.Char8 as LBS
21 import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
22 import Gargantext.API.Admin.Orchestrator.Types
23 import Gargantext.Prelude
25 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
26 => JobServerURL e Schedule o
29 callJobScrapy jurl schedule = do
30 progress $ NewTask jurl
31 out <- view job_output <$>
32 retryOnTransientFailure (clientCallbackJob' jurl
33 (fmap (const ()) . scrapySchedule . schedule))
34 progress $ Finished jurl Nothing
37 logConsole :: ToJSON a => a -> IO ()
38 logConsole = LBS.putStrLn . encode
40 callScraper :: MonadClientJob m => URL -> ScraperInput -> m JobLog
41 callScraper url input =
42 callJobScrapy jurl $ \cb ->
44 { s_project = "gargantext"
45 , s_spider = input ^. scin_spider
50 [("query", input ^.. scin_query . _Just)
51 ,("user", [input ^. scin_user])
52 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
53 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
54 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
55 ,("url", input ^.. scin_local_file . _Just)
56 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
57 ,("callback", [toUrlPiece cb])]
60 jurl :: JobServerURL JobLog Schedule JobLog
61 jurl = JobServerURL url Callback
63 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
64 -> (e -> IO ()) -> IO JobLog
65 pipeline scrapyurl client_env input log_status = do
66 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
67 either (panic . cs . show) pure e -- TODO throwError
69 -- TODO integrate to ServerT
71 -- * serveJobsAPI instead of simpleServeJobsAPI
72 -- * JobFunction instead of simpleJobFunction
74 -- * HasSelfUrl or move self_url to settings
75 -- * HasScrapers or move scrapers to settings
79 import Servant.Job.Server
80 import Servant.Job.Utils (extendBaseUrl)
81 import Gargantext.API.Admin.Types
82 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
83 scrapyOrchestrator env = do
84 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
85 defaultSettings (extendBaseUrl ("scraper" :: Text) $ env ^. env_self_url)
86 (env ^. env_manager) (LogEvent logConsole) $
87 simpleServeJobsAPI (env ^. env_scrapers) .
88 simpleJobFunction . pipeline (URL $ env ^. settings . scrapydUrl)