]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Orchestrator.hs
[List] add Content-Type header info to list download
[gargantext.git] / src / Gargantext / API / Orchestrator.hs
1 {-# LANGUAGE DataKinds #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE FlexibleInstances #-}
5 {-# LANGUAGE DeriveGeneric #-}
6 {-# LANGUAGE RankNTypes #-}
7 {-# LANGUAGE TemplateHaskell #-}
8 {-# LANGUAGE TypeOperators #-}
9
10 module Gargantext.API.Orchestrator where
11
12 import Gargantext.Prelude
13 import Gargantext.API.Settings
14 import Gargantext.API.Orchestrator.Types
15 import Gargantext.API.Orchestrator.Scrapy.Schedule
16 import Control.Lens hiding (elements)
17 import Data.Aeson
18 import qualified Data.ByteString.Lazy.Char8 as LBS
19 import Servant
20 import Servant.Job.Async
21 import Servant.Job.Client
22 import Servant.Job.Server
23 import Servant.Job.Utils (extendBaseUrl)
24
25 callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
26 => JobServerURL e Schedule o
27 -> (URL -> Schedule)
28 -> m o
29 callJobScrapy jurl schedule = do
30 progress $ NewTask jurl
31 out <- view job_output <$>
32 retryOnTransientFailure (clientCallbackJob' jurl
33 (fmap (const ()) . scrapySchedule . schedule))
34 progress $ Finished jurl Nothing
35 pure out
36
37 logConsole :: ToJSON a => a -> IO ()
38 logConsole = LBS.putStrLn . encode
39
40 callScraper :: MonadClientJob m => URL -> ScraperInput -> m ScraperStatus
41 callScraper url input =
42 callJobScrapy jurl $ \cb ->
43 Schedule
44 { s_project = "gargantext"
45 , s_spider = input ^. scin_spider
46 , s_setting = []
47 , s_jobid = Nothing
48 , s_version = Nothing
49 , s_extra =
50 [("query", input ^.. scin_query . _Just)
51 ,("user", [input ^. scin_user])
52 ,("corpus", [input ^. scin_corpus . to toUrlPiece])
53 ,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
54 ,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
55 ,("url", input ^.. scin_local_file . _Just)
56 ,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
57 ,("callback", [toUrlPiece cb])]
58 }
59 where
60 jurl :: JobServerURL ScraperStatus Schedule ScraperStatus
61 jurl = JobServerURL url Callback
62
63 pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
64 -> (e -> IO ()) -> IO ScraperStatus
65 pipeline scrapyurl client_env input log_status = do
66 e <- runJobMLog client_env log_status $ callScraper scrapyurl input
67 either (panic . cs . show) pure e -- TODO throwError
68
69 -- TODO integrate to ServerT
70 -- use:
71 -- * serveJobsAPI instead of simpleServeJobsAPI
72 -- * JobFunction instead of simpleJobFunction
73 scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
74 scrapyOrchestrator env = do
75 apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
76 defaultSettings (extendBaseUrl ("scraper" :: String) $ env ^. env_self_url)
77 (env ^. env_manager) (LogEvent logConsole) $
78 simpleServeJobsAPI (env ^. env_scrapers) .
79 simpleJobFunction . pipeline (URL $ env ^. env_settings . scrapydUrl)