]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Settings.hs
[SCRAPER] setup the new version of addToCorpus with servant-job
[gargantext.git] / src / Gargantext / API / Settings.hs
1 {-|
2 Module : Gargantext.API.Settings
3 Description : Settings of the API (Server and Client)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 TODO-SECURITY: Critical
11 -}
12
13 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
14
15 {-# LANGUAGE DataKinds #-}
16 {-# LANGUAGE DeriveGeneric #-}
17 {-# LANGUAGE FlexibleContexts #-}
18 {-# LANGUAGE FlexibleInstances #-}
19 {-# LANGUAGE MultiParamTypeClasses #-}
20 {-# LANGUAGE NoImplicitPrelude #-}
21 {-# LANGUAGE OverloadedStrings #-}
22 {-# LANGUAGE RankNTypes #-}
23 {-# LANGUAGE ScopedTypeVariables #-}
24 {-# LANGUAGE TemplateHaskell #-}
25
26 module Gargantext.API.Settings
27 where
28
29 import System.Directory
30 import System.Log.FastLogger
31 import GHC.Enum
32 import GHC.Generics (Generic)
33 import Prelude (Bounded(), fail)
34 import System.Environment (lookupEnv)
35 import System.IO (FilePath, hClose)
36 import System.IO.Temp (withTempFile)
37 import System.FileLock (tryLockFile, unlockFile, SharedExclusive(Exclusive))
38 import Database.PostgreSQL.Simple (Connection, connect)
39 import Network.HTTP.Client (Manager)
40 import Network.HTTP.Client.TLS (newTlsManager)
41
42 import Data.Aeson
43 import Data.Maybe (fromMaybe)
44 import Data.Either (either)
45 import Data.Text
46 --import Data.Text.Encoding (encodeUtf8)
47 import Data.ByteString (ByteString)
48 import qualified Data.ByteString.Lazy as L
49
50 import Servant
51 import Servant.Auth.Server (defaultJWTSettings, JWTSettings, CookieSettings, defaultCookieSettings, readKey, writeKey)
52 import Servant.Client (BaseUrl, parseBaseUrl)
53 import qualified Servant.Job.Core
54 import Servant.Job.Async (newJobEnv, defaultSettings, HasJobEnv(..), Job)
55 import Web.HttpApiData (parseUrlPiece)
56
57 import Control.Concurrent
58 import Control.Debounce (mkDebounce, defaultDebounceSettings, debounceFreq, debounceAction)
59 import Control.Exception (finally)
60 import Control.Monad.Logger
61 import Control.Monad.Reader
62 import Control.Lens
63 import Gargantext.Prelude
64 import Gargantext.Database.Utils (databaseParameters, HasConnection(..), Cmd', runCmd)
65 import Gargantext.API.Ngrams (NgramsRepo, HasRepoVar(..), HasRepoSaver(..), HasRepo(..), RepoEnv(..), r_version, saveRepo, initRepo, renv_var, renv_lock)
66 import Gargantext.API.Orchestrator.Types
67
68 type PortNumber = Int
69
70 data SendEmailType = SendEmailViaAws
71 | LogEmailToConsole
72 | WriteEmailToFile
73 deriving (Show, Read, Enum, Bounded, Generic)
74
75
76 data Settings = Settings
77 { _allowedOrigin :: ByteString -- allowed origin for CORS
78 , _allowedHost :: ByteString -- allowed host for CORS
79 , _appPort :: PortNumber
80 , _logLevelLimit :: LogLevel -- log level from the monad-logger package
81 -- , _dbServer :: Text
82 -- ^ this is not used yet
83 , _jwtSettings :: JWTSettings
84 , _cookieSettings :: CookieSettings
85 , _sendLoginEmails :: SendEmailType
86 , _scrapydUrl :: BaseUrl
87 , _fileFolder :: FilePath
88 }
89
90 makeLenses ''Settings
91
92 class HasSettings env where
93 settings :: Getter env Settings
94
95 devSettings :: FilePath -> IO Settings
96 devSettings jwkFile = do
97 jwkExists <- doesFileExist jwkFile
98 when (not jwkExists) $ writeKey jwkFile
99 jwk <- readKey jwkFile
100 pure $ Settings
101 { _allowedOrigin = "http://localhost:8008"
102 , _allowedHost = "localhost:3000"
103 , _appPort = 3000
104 , _logLevelLimit = LevelDebug
105 -- , _dbServer = "localhost"
106 , _sendLoginEmails = LogEmailToConsole
107 , _scrapydUrl = fromMaybe (panic "Invalid scrapy URL") $ parseBaseUrl "http://localhost:6800"
108 , _fileFolder = "data"
109 , _cookieSettings = defaultCookieSettings -- TODO-SECURITY tune
110 , _jwtSettings = defaultJWTSettings jwk -- TODO-SECURITY tune
111 }
112
113
114
115 reqSetting :: FromHttpApiData a => Text -> IO a
116 reqSetting name = do
117 e <- fromMaybe (panic $ "Missing " <> name) <$> lookupEnv (unpack name)
118 pure $ either (panic $ "Unable to parse " <> name) identity $ parseUrlPiece $ pack e
119
120 optSetting :: FromHttpApiData a => Text -> a -> IO a
121 optSetting name d = do
122 me <- lookupEnv (unpack name)
123 case me of
124 Nothing -> pure d
125 Just e -> pure $ either (panic $ "Unable to parse " <> name) identity $ parseUrlPiece $ pack e
126
127 --settingsFromEnvironment :: IO Settings
128 --settingsFromEnvironment =
129 -- Settings <$> (encodeUtf8 <$> reqSetting "ALLOWED_ORIGIN")
130 -- <*> (encodeUtf8 <$> reqSetting "ALLOWED_HOST")
131 -- <*> optSetting "PORT" 3000
132 -- <*> (parseLogLevel <$> optSetting "LOG_LEVEL" "warn")
133 -- <*> reqSetting "DB_SERVER"
134 -- <*> (parseJwk <$> reqSetting "JWT_SECRET")
135 -- <*> optSetting "SEND_EMAIL" SendEmailViaAws
136
137 data FireWall = FireWall { unFireWall :: Bool }
138
139 data Env = Env
140 { _env_settings :: !Settings
141 , _env_logger :: !LoggerSet
142 , _env_conn :: !Connection
143 , _env_repo :: !RepoEnv
144 , _env_manager :: !Manager
145 , _env_self_url :: !BaseUrl
146 , _env_scrapers :: !ScrapersEnv
147 }
148 deriving (Generic)
149
150 makeLenses ''Env
151
152 instance HasConnection Env where
153 connection = env_conn
154
155 instance HasRepoVar Env where
156 repoVar = repoEnv . repoVar
157
158 instance HasRepoSaver Env where
159 repoSaver = repoEnv . repoSaver
160
161 instance HasRepo Env where
162 repoEnv = env_repo
163
164 instance HasSettings Env where
165 settings = env_settings
166
167 instance Servant.Job.Core.HasEnv Env (Job ScraperStatus ScraperStatus) where
168 _env = env_scrapers . Servant.Job.Core._env
169
170 instance HasJobEnv Env ScraperStatus ScraperStatus where
171 job_env = env_scrapers
172
173 data MockEnv = MockEnv
174 { _menv_firewall :: !FireWall
175 }
176 deriving (Generic)
177
178 makeLenses ''MockEnv
179
180 -- | TODO add this path in Settings
181 repoSnapshot :: FilePath
182 repoSnapshot = "repo.json"
183
184 -- | TODO add hard coded file in Settings
185 -- This assumes we own the lock on repoSnapshot.
186 repoSaverAction :: ToJSON a => a -> IO ()
187 repoSaverAction a = do
188 withTempFile "." "tmp-repo.json" $ \fp h -> do
189 -- printDebug "repoSaverAction" fp
190 L.hPut h $ encode a
191 hClose h
192 renameFile fp repoSnapshot
193
194 mkRepoSaver :: MVar NgramsRepo -> IO (IO ())
195 mkRepoSaver repo_var = mkDebounce settings
196 where
197 settings = defaultDebounceSettings
198 { debounceFreq = 1000000 -- 1 second
199 , debounceAction = withMVar repo_var repoSaverAction
200 -- Here this not only `readMVar` but `takeMVar`.
201 -- Namely while repoSaverAction is saving no other change
202 -- can be made to the MVar.
203 -- This might be not efficent and thus reconsidered later.
204 -- However this enables to safely perform a *final* save.
205 -- See `cleanEnv`.
206 -- Future work:
207 -- Add a new MVar just for saving.
208 }
209
210 readRepoEnv :: IO RepoEnv
211 readRepoEnv = do
212 -- Does file exist ? :: Bool
213 repoFile <- doesFileExist repoSnapshot
214
215 -- Is file not empty ? :: Bool
216 repoExists <- if repoFile
217 then (>0) <$> getFileSize repoSnapshot
218 else pure False
219
220 mlock <- tryLockFile repoSnapshot Exclusive
221 lock <- maybe (panic "Repo file already locked") pure mlock
222
223 mvar <- newMVar =<<
224 if repoExists
225 then do
226 e_repo <- eitherDecodeFileStrict repoSnapshot
227 repo <- either fail pure e_repo
228 let archive = repoSnapshot <> ".v" <> show (repo ^. r_version)
229 copyFile repoSnapshot archive
230 pure repo
231 else
232 pure initRepo
233
234 saver <- mkRepoSaver mvar
235 pure $ RepoEnv { _renv_var = mvar, _renv_saver = saver, _renv_lock = lock }
236
237 devJwkFile :: FilePath
238 devJwkFile = "dev.jwk"
239
240 newEnv :: PortNumber -> FilePath -> IO Env
241 newEnv port file = do
242 manager <- newTlsManager
243 settings <- devSettings devJwkFile <&> appPort .~ port -- TODO read from 'file'
244 when (port /= settings ^. appPort) $
245 panic "TODO: conflicting settings of port"
246
247 self_url <- parseBaseUrl $ "http://0.0.0.0:" <> show port
248 param <- databaseParameters file
249 conn <- connect param
250 repo <- readRepoEnv
251 scrapers_env <- newJobEnv defaultSettings manager
252 logger <- newStderrLoggerSet defaultBufSize
253
254 pure $ Env
255 { _env_settings = settings
256 , _env_logger = logger
257 , _env_conn = conn
258 , _env_repo = repo
259 , _env_manager = manager
260 , _env_scrapers = scrapers_env
261 , _env_self_url = self_url
262 }
263
264 data DevEnv = DevEnv
265 { _dev_env_conn :: !Connection
266 , _dev_env_repo :: !RepoEnv
267 , _dev_env_settings :: !Settings
268 }
269
270 makeLenses ''DevEnv
271
272 instance HasConnection DevEnv where
273 connection = dev_env_conn
274
275 instance HasRepoVar DevEnv where
276 repoVar = repoEnv . repoVar
277
278 instance HasRepoSaver DevEnv where
279 repoSaver = repoEnv . repoSaver
280
281 instance HasRepo DevEnv where
282 repoEnv = dev_env_repo
283
284 instance HasSettings DevEnv where
285 settings = dev_env_settings
286
287 cleanEnv :: HasRepo env => env -> IO ()
288 cleanEnv env = do
289 r <- takeMVar (env ^. repoEnv . renv_var)
290 repoSaverAction r
291 unlockFile (env ^. repoEnv . renv_lock)
292
293 withDevEnv :: FilePath -> (DevEnv -> IO a) -> IO a
294 withDevEnv iniPath k = do
295 env <- newDevEnv
296 k env `finally` cleanEnv env
297
298 where
299 newDevEnv = do
300 param <- databaseParameters iniPath
301 conn <- connect param
302 repo <- readRepoEnv
303 setts <- devSettings devJwkFile
304 pure $ DevEnv
305 { _dev_env_conn = conn
306 , _dev_env_repo = repo
307 , _dev_env_settings = setts
308 }
309
310 -- | Run Cmd Sugar for the Repl (GHCI)
311 runCmdRepl :: Show err => Cmd' DevEnv err a -> IO a
312 runCmdRepl f = withDevEnv "gargantext.ini" $ \env -> runCmdDev env f
313
314 runCmdReplServantErr :: Cmd' DevEnv ServerError a -> IO a
315 runCmdReplServantErr = runCmdRepl
316
317 -- Use only for dev
318 -- In particular this writes the repo file after running
319 -- the command.
320 -- This function is constrained to the DevEnv rather than
321 -- using HasConnection and HasRepoVar.
322 runCmdDev :: Show err => DevEnv -> Cmd' DevEnv err a -> IO a
323 runCmdDev env f =
324 (either (fail . show) pure =<< runCmd env f)
325 `finally`
326 runReaderT saveRepo env
327
328 -- Use only for dev
329 runCmdDevNoErr :: DevEnv -> Cmd' DevEnv () a -> IO a
330 runCmdDevNoErr = runCmdDev
331
332 -- Use only for dev
333 runCmdDevServantErr :: DevEnv -> Cmd' DevEnv ServerError a -> IO a
334 runCmdDevServantErr = runCmdDev