]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Admin/Orchestrator.hs
[ngrams] logRef for logging task progress
[gargantext.git] / src / Gargantext / API / Admin / Orchestrator.hs
1 {-|
2 Module : Gargantext.API.Admin.Orchestrator
3 Description : Jobs Orchestrator
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE TypeOperators #-}
14
15 module Gargantext.API.Admin.Orchestrator where
16
17 import Control.Lens hiding (elements)
18 import Data.Aeson
19 import Servant
20 import Servant.Job.Async
21 import Servant.Job.Client
22 import qualified Data.ByteString.Lazy.Char8 as LBS
23
24 import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
25 import Gargantext.API.Admin.Orchestrator.Types
26 import Gargantext.Prelude
27
28 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
29 => JobServerURL e Schedule o
30 -> (URL -> Schedule)
31 -> m o
32 callJobScrapy jurl schedule = do
33 progress $ NewTask jurl
34 out <- view job_output <$>
35 retryOnTransientFailure (clientCallbackJob' jurl
36 (fmap (const ()) . scrapySchedule . schedule))
37 progress $ Finished jurl Nothing
38 pure out
39
40 logConsole :: ToJSON a => a -> IO ()
41 logConsole = LBS.putStrLn . encode
42
43 callScraper :: MonadClientJob m => URL -> ScraperInput -> m JobLog
44 callScraper url input =
45 callJobScrapy jurl $ \cb ->
46 Schedule
47 { s_project = "gargantext"
48 , s_spider = input ^. scin_spider
49 , s_setting = []
50 , s_jobid = Nothing
51 , s_version = Nothing
52 , s_extra =
53 [("query", input ^.. scin_query . _Just)
54 ,("user", [input ^. scin_user])
55 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
56 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
57 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
58 ,("url", input ^.. scin_local_file . _Just)
59 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
60 ,("callback", [toUrlPiece cb])]
61 }
62 where
63 jurl :: JobServerURL JobLog Schedule JobLog
64 jurl = JobServerURL url Callback
65
66 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
67 -> (e -> IO ()) -> IO JobLog
68 pipeline scrapyurl client_env input log_status = do
69 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
70 either (panic . cs . show) pure e -- TODO throwError
71
72 -- TODO integrate to ServerT
73 -- use:
74 -- * serveJobsAPI instead of simpleServeJobsAPI
75 -- * JobFunction instead of simpleJobFunction
76 -- TODO:
77 -- * HasSelfUrl or move self_url to settings
78 -- * HasScrapers or move scrapers to settings
79 -- * EnvC env
80 {- NOT USED YET
81 import Data.Text
82 import Servant.Job.Server
83 import Servant.Job.Utils (extendBaseUrl)
84 import Gargantext.API.Admin.Types
85 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
86 scrapyOrchestrator env = do
87 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
88 defaultSettings (extendBaseUrl ("scraper" :: Text) $ env ^. env_self_url)
89 (env ^. env_manager) (LogEvent logConsole) $
90 simpleServeJobsAPI (env ^. env_scrapers) .
91 simpleJobFunction . pipeline (URL $ env ^. settings . scrapydUrl)
92 -}