]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Admin/Orchestrator.hs
Improve text2ngrams to strip spaces
[gargantext.git] / src / Gargantext / API / Admin / Orchestrator.hs
1 {-|
2 Module : Gargantext.API.Admin.Orchestrator
3 Description : Jobs Orchestrator
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE TypeOperators #-}
14
15 module Gargantext.API.Admin.Orchestrator where
16
17 import Control.Lens hiding (elements)
18 import Data.Aeson
19 import Data.Text
20 import Servant
21 import Servant.Job.Async
22 import Servant.Job.Client
23 import Servant.Job.Server
24 import Servant.Job.Utils (extendBaseUrl)
25 import qualified Data.ByteString.Lazy.Char8 as LBS
26
27 import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
28 import Gargantext.API.Admin.Orchestrator.Types
29 import Gargantext.API.Admin.Types
30 import Gargantext.Prelude
31
32 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
33 => JobServerURL e Schedule o
34 -> (URL -> Schedule)
35 -> m o
36 callJobScrapy jurl schedule = do
37 progress $ NewTask jurl
38 out <- view job_output <$>
39 retryOnTransientFailure (clientCallbackJob' jurl
40 (fmap (const ()) . scrapySchedule . schedule))
41 progress $ Finished jurl Nothing
42 pure out
43
44 logConsole :: ToJSON a => a -> IO ()
45 logConsole = LBS.putStrLn . encode
46
47 callScraper :: MonadClientJob m => URL -> ScraperInput -> m JobLog
48 callScraper url input =
49 callJobScrapy jurl $ \cb ->
50 Schedule
51 { s_project = "gargantext"
52 , s_spider = input ^. scin_spider
53 , s_setting = []
54 , s_jobid = Nothing
55 , s_version = Nothing
56 , s_extra =
57 [("query", input ^.. scin_query . _Just)
58 ,("user", [input ^. scin_user])
59 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
60 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
61 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
62 ,("url", input ^.. scin_local_file . _Just)
63 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
64 ,("callback", [toUrlPiece cb])]
65 }
66 where
67 jurl :: JobServerURL JobLog Schedule JobLog
68 jurl = JobServerURL url Callback
69
70 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
71 -> (e -> IO ()) -> IO JobLog
72 pipeline scrapyurl client_env input log_status = do
73 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
74 either (panic . cs . show) pure e -- TODO throwError
75
76 -- TODO integrate to ServerT
77 -- use:
78 -- * serveJobsAPI instead of simpleServeJobsAPI
79 -- * JobFunction instead of simpleJobFunction
80 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
81 scrapyOrchestrator env = do
82 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
83 defaultSettings (extendBaseUrl ("scraper" :: Text) $ env ^. env_self_url)
84 (env ^. env_manager) (LogEvent logConsole) $
85 simpleServeJobsAPI (env ^. env_scrapers) .
86 simpleJobFunction . pipeline (URL $ env ^. env_settings . scrapydUrl)