1 {-# LANGUAGE DataKinds #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE FlexibleInstances #-}
5 {-# LANGUAGE DeriveGeneric #-}
6 {-# LANGUAGE RankNTypes #-}
7 {-# LANGUAGE TemplateHaskell #-}
8 {-# LANGUAGE TypeOperators #-}
10 module Gargantext.API.Orchestrator where
12 import Gargantext.Prelude
13 import Gargantext.API.Settings
14 import Gargantext.API.Orchestrator.Types
15 import Gargantext.API.Orchestrator.Scrapy.Schedule
16 import Control.Lens hiding (elements)
18 import qualified Data.ByteString.Lazy.Char8 as LBS
20 import Servant.Job.Async
21 import Servant.Job.Client
22 import Servant.Job.Server
23 import Servant.Job.Utils (extendBaseUrl)
25 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
26 => JobServerURL e Schedule o
29 callJobScrapy jurl schedule = do
30 progress $ NewTask jurl
31 out <- view job_output <$>
32 retryOnTransientFailure (clientCallbackJob' jurl
33 (fmap (const ()) . scrapySchedule . schedule))
34 progress $ Finished jurl Nothing
37 logConsole :: ToJSON a => a -> IO ()
38 logConsole = LBS.putStrLn . encode
40 callScraper :: MonadClientJob m => URL -> ScraperInput -> m ScraperStatus
41 callScraper url input =
42 callJobScrapy jurl $ \cb ->
44 { s_project = "gargantext"
45 , s_spider = input ^. scin_spider
50 [("query", input ^.. scin_query . _Just)
51 ,("user", [input ^. scin_user])
52 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
53 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
54 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
55 ,("url", input ^.. scin_local_file . _Just)
56 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
57 ,("callback", [toUrlPiece cb])]
60 jurl :: JobServerURL ScraperStatus Schedule ScraperStatus
61 jurl = JobServerURL url Callback
63 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
64 -> (e -> IO ()) -> IO ScraperStatus
65 pipeline scrapyurl client_env input log_status = do
66 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
67 either (panic . cs . show) pure e -- TODO throwError
69 -- TODO integrate to ServerT
71 -- * serveJobsAPI instead of simpleServeJobsAPI
72 -- * JobFunction instead of simpleJobFunction
73 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
74 scrapyOrchestrator env = do
75 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
76 defaultSettings (extendBaseUrl ("scraper" :: String) $ env ^. env_self_url)
77 (env ^. env_manager) (LogEvent logConsole) $
78 simpleServeJobsAPI (env ^. env_scrapers) .
79 simpleJobFunction . pipeline (URL $ env ^. env_settings . scrapydUrl)