]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Admin/Orchestrator.hs
[DB/Errors] clean error messages and structure
[gargantext.git] / src / Gargantext / API / Admin / Orchestrator.hs
1 {-|
2 Module : Gargantext.API.Admin.Orchestrator
3 Description : Jobs Orchestrator
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE DataKinds #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE FlexibleInstances #-}
16 {-# LANGUAGE DeriveGeneric #-}
17 {-# LANGUAGE RankNTypes #-}
18 {-# LANGUAGE TemplateHaskell #-}
19 {-# LANGUAGE TypeOperators #-}
20
21 module Gargantext.API.Admin.Orchestrator where
22
23 import Gargantext.Prelude
24 import Gargantext.API.Admin.Settings
25 import Gargantext.API.Admin.Orchestrator.Types
26 import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
27 import Control.Lens hiding (elements)
28 import Data.Aeson
29 import qualified Data.ByteString.Lazy.Char8 as LBS
30 import Servant
31 import Servant.Job.Async
32 import Servant.Job.Client
33 import Servant.Job.Server
34 import Servant.Job.Utils (extendBaseUrl)
35
36 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
37 => JobServerURL e Schedule o
38 -> (URL -> Schedule)
39 -> m o
40 callJobScrapy jurl schedule = do
41 progress $ NewTask jurl
42 out <- view job_output <$>
43 retryOnTransientFailure (clientCallbackJob' jurl
44 (fmap (const ()) . scrapySchedule . schedule))
45 progress $ Finished jurl Nothing
46 pure out
47
48 logConsole :: ToJSON a => a -> IO ()
49 logConsole = LBS.putStrLn . encode
50
51 callScraper :: MonadClientJob m => URL -> ScraperInput -> m ScraperStatus
52 callScraper url input =
53 callJobScrapy jurl $ \cb ->
54 Schedule
55 { s_project = "gargantext"
56 , s_spider = input ^. scin_spider
57 , s_setting = []
58 , s_jobid = Nothing
59 , s_version = Nothing
60 , s_extra =
61 [("query", input ^.. scin_query . _Just)
62 ,("user", [input ^. scin_user])
63 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
64 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
65 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
66 ,("url", input ^.. scin_local_file . _Just)
67 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
68 ,("callback", [toUrlPiece cb])]
69 }
70 where
71 jurl :: JobServerURL ScraperStatus Schedule ScraperStatus
72 jurl = JobServerURL url Callback
73
74 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
75 -> (e -> IO ()) -> IO ScraperStatus
76 pipeline scrapyurl client_env input log_status = do
77 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
78 either (panic . cs . show) pure e -- TODO throwError
79
80 -- TODO integrate to ServerT
81 -- use:
82 -- * serveJobsAPI instead of simpleServeJobsAPI
83 -- * JobFunction instead of simpleJobFunction
84 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
85 scrapyOrchestrator env = do
86 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
87 defaultSettings (extendBaseUrl ("scraper" :: String) $ env ^. env_self_url)
88 (env ^. env_manager) (LogEvent logConsole) $
89 simpleServeJobsAPI (env ^. env_scrapers) .
90 simpleJobFunction . pipeline (URL $ env ^. env_settings . scrapydUrl)