]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Admin/Orchestrator.hs
Merge remote-tracking branch 'origin/dev-openalex' into dev
[gargantext.git] / src / Gargantext / API / Admin / Orchestrator.hs
1 {-|
2 Module : Gargantext.API.Admin.Orchestrator
3 Description : Jobs Orchestrator
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.API.Admin.Orchestrator where
13
14 import Control.Lens hiding (elements)
15 import Data.Aeson
16 import Servant
17 import Servant.Job.Async
18 import Servant.Job.Client
19 import qualified Data.ByteString.Lazy.Char8 as LBS
20
21 import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
22 import Gargantext.API.Admin.Orchestrator.Types
23 import Gargantext.Prelude
24
25 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
26 => JobServerURL e Schedule o
27 -> (URL -> Schedule)
28 -> m o
29 callJobScrapy jurl schedule = do
30 progress $ NewTask jurl
31 out <- view job_output <$>
32 retryOnTransientFailure (clientCallbackJob' jurl
33 (fmap (const ()) . scrapySchedule . schedule))
34 progress $ Finished jurl Nothing
35 pure out
36
37 logConsole :: ToJSON a => a -> IO ()
38 logConsole = LBS.putStrLn . encode
39
40 callScraper :: MonadClientJob m => URL -> ScraperInput -> m JobLog
41 callScraper url input =
42 callJobScrapy jurl $ \cb ->
43 Schedule
44 { s_project = "gargantext"
45 , s_spider = input ^. scin_spider
46 , s_setting = []
47 , s_jobid = Nothing
48 , s_version = Nothing
49 , s_extra =
50 [("query", input ^.. scin_query . _Just)
51 ,("user", [input ^. scin_user])
52 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
53 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
54 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
55 ,("url", input ^.. scin_local_file . _Just)
56 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
57 ,("callback", [toUrlPiece cb])]
58 }
59 where
60 jurl :: JobServerURL JobLog Schedule JobLog
61 jurl = JobServerURL url Callback
62
63 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
64 -> (e -> IO ()) -> IO JobLog
65 pipeline scrapyurl client_env input log_status = do
66 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
67 either (panic . cs . show) pure e -- TODO throwError
68
69 -- TODO integrate to ServerT
70 -- use:
71 -- * serveJobsAPI instead of simpleServeJobsAPI
72 -- * JobFunction instead of simpleJobFunction
73 -- TODO:
74 -- * HasSelfUrl or move self_url to settings
75 -- * HasScrapers or move scrapers to settings
76 -- * EnvC env
77 {- NOT USED YET
78 import Data.Text
79 import Servant.Job.Server
80 import Servant.Job.Utils (extendBaseUrl)
81 import Gargantext.API.Admin.Types
82 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
83 scrapyOrchestrator env = do
84 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
85 defaultSettings (extendBaseUrl ("scraper" :: Text) $ env ^. env_self_url)
86 (env ^. env_manager) (LogEvent logConsole) $
87 simpleServeJobsAPI (env ^. env_scrapers) .
88 simpleJobFunction . pipeline (URL $ env ^. settings . scrapydUrl)
89 -}