]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Admin/Orchestrator.hs
Merge branch 'dev-default-extensions' into dev
[gargantext.git] / src / Gargantext / API / Admin / Orchestrator.hs
1 {-|
2 Module : Gargantext.API.Admin.Orchestrator
3 Description : Jobs Orchestrator
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE TypeOperators #-}
14
15 module Gargantext.API.Admin.Orchestrator where
16
17 import Control.Lens hiding (elements)
18 import Data.Aeson
19 import qualified Data.ByteString.Lazy.Char8 as LBS
20 import Data.Text
21 import Servant
22 import Servant.Job.Async
23 import Servant.Job.Client
24 import Servant.Job.Server
25 import Servant.Job.Utils (extendBaseUrl)
26
27 import Gargantext.Prelude
28 import Gargantext.API.Admin.Settings
29 import Gargantext.API.Admin.Orchestrator.Types
30 import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
31
32 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
33 => JobServerURL e Schedule o
34 -> (URL -> Schedule)
35 -> m o
36 callJobScrapy jurl schedule = do
37 progress $ NewTask jurl
38 out <- view job_output <$>
39 retryOnTransientFailure (clientCallbackJob' jurl
40 (fmap (const ()) . scrapySchedule . schedule))
41 progress $ Finished jurl Nothing
42 pure out
43
44 logConsole :: ToJSON a => a -> IO ()
45 logConsole = LBS.putStrLn . encode
46
47 callScraper :: MonadClientJob m => URL -> ScraperInput -> m ScraperStatus
48 callScraper url input =
49 callJobScrapy jurl $ \cb ->
50 Schedule
51 { s_project = "gargantext"
52 , s_spider = input ^. scin_spider
53 , s_setting = []
54 , s_jobid = Nothing
55 , s_version = Nothing
56 , s_extra =
57 [("query", input ^.. scin_query . _Just)
58 ,("user", [input ^. scin_user])
59 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
60 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
61 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
62 ,("url", input ^.. scin_local_file . _Just)
63 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
64 ,("callback", [toUrlPiece cb])]
65 }
66 where
67 jurl :: JobServerURL ScraperStatus Schedule ScraperStatus
68 jurl = JobServerURL url Callback
69
70 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
71 -> (e -> IO ()) -> IO ScraperStatus
72 pipeline scrapyurl client_env input log_status = do
73 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
74 either (panic . cs . show) pure e -- TODO throwError
75
76 -- TODO integrate to ServerT
77 -- use:
78 -- * serveJobsAPI instead of simpleServeJobsAPI
79 -- * JobFunction instead of simpleJobFunction
80 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
81 scrapyOrchestrator env = do
82 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
83 defaultSettings (extendBaseUrl ("scraper" :: Text) $ env ^. env_self_url)
84 (env ^. env_manager) (LogEvent logConsole) $
85 simpleServeJobsAPI (env ^. env_scrapers) .
86 simpleJobFunction . pipeline (URL $ env ^. env_settings . scrapydUrl)