]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Admin/Orchestrator.hs
[RENAME] ScraperStatus -> JobLog
[gargantext.git] / src / Gargantext / API / Admin / Orchestrator.hs
1 {-|
2 Module : Gargantext.API.Admin.Orchestrator
3 Description : Jobs Orchestrator
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE TypeOperators #-}
14
15 module Gargantext.API.Admin.Orchestrator where
16
17 import Control.Lens hiding (elements)
18 import Data.Aeson
19 import Data.Text
20 import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
21 import Gargantext.API.Admin.Orchestrator.Types
22 import Gargantext.API.Admin.Settings
23 import Gargantext.Prelude
24 import Servant
25 import Servant.Job.Async
26 import Servant.Job.Client
27 import Servant.Job.Server
28 import Servant.Job.Utils (extendBaseUrl)
29 import qualified Data.ByteString.Lazy.Char8 as LBS
30
31 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
32 => JobServerURL e Schedule o
33 -> (URL -> Schedule)
34 -> m o
35 callJobScrapy jurl schedule = do
36 progress $ NewTask jurl
37 out <- view job_output <$>
38 retryOnTransientFailure (clientCallbackJob' jurl
39 (fmap (const ()) . scrapySchedule . schedule))
40 progress $ Finished jurl Nothing
41 pure out
42
43 logConsole :: ToJSON a => a -> IO ()
44 logConsole = LBS.putStrLn . encode
45
46 callScraper :: MonadClientJob m => URL -> ScraperInput -> m JobLog
47 callScraper url input =
48 callJobScrapy jurl $ \cb ->
49 Schedule
50 { s_project = "gargantext"
51 , s_spider = input ^. scin_spider
52 , s_setting = []
53 , s_jobid = Nothing
54 , s_version = Nothing
55 , s_extra =
56 [("query", input ^.. scin_query . _Just)
57 ,("user", [input ^. scin_user])
58 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
59 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
60 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
61 ,("url", input ^.. scin_local_file . _Just)
62 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
63 ,("callback", [toUrlPiece cb])]
64 }
65 where
66 jurl :: JobServerURL JobLog Schedule JobLog
67 jurl = JobServerURL url Callback
68
69 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
70 -> (e -> IO ()) -> IO JobLog
71 pipeline scrapyurl client_env input log_status = do
72 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
73 either (panic . cs . show) pure e -- TODO throwError
74
75 -- TODO integrate to ServerT
76 -- use:
77 -- * serveJobsAPI instead of simpleServeJobsAPI
78 -- * JobFunction instead of simpleJobFunction
79 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
80 scrapyOrchestrator env = do
81 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
82 defaultSettings (extendBaseUrl ("scraper" :: Text) $ env ^. env_self_url)
83 (env ^. env_manager) (LogEvent logConsole) $
84 simpleServeJobsAPI (env ^. env_scrapers) .
85 simpleJobFunction . pipeline (URL $ env ^. env_settings . scrapydUrl)