{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.API.Corpus.New
where
--import Gargantext.Text.Corpus.Parsers (parseFile, FileFormat(..))
import Control.Lens hiding (elements)
import Control.Monad.IO.Class (liftIO)
+import Control.Concurrent
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
+import Data.Maybe (fromMaybe)
import Data.Either
import Data.Swagger
import Data.Text (Text)
import Gargantext.Database.Types.Node (ToHyperdataDocument(..))
import Gargantext.Database.Types.Node (UserId)
import Gargantext.Prelude
-import Gargantext.Text.Corpus.Parsers (FileFormat(..), parseFormat)
+import qualified Gargantext.Text.Corpus.Parsers as Parser (FileFormat(..), parseFormat)
import Gargantext.Text.Terms (TermType(..))
import Servant
import Servant.API.Flatten (Flat)
instance ToSchema Query where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
-type Api = Summary "New Corpus endpoint"
- :> ReqBody '[JSON] Query
- :> Post '[JSON] CorpusId
- :<|> Get '[JSON] ApiInfo
+------------------------------------------------------------------------
+
+type Api = PostApi
+ :<|> GetApi
+
+type PostApi = Summary "New Corpus endpoint"
+ :> ReqBody '[JSON] Query
+ :> Post '[JSON] CorpusId
+type GetApi = Get '[JSON] ApiInfo
-- | TODO manage several apis
-- TODO-ACCESS
data WithForm = WithForm
{ _wf_filetype :: !FileType
, _wf_data :: !Text
+ , _wf_lang :: !(Maybe Lang)
+ , _wf_name :: !Text
} deriving (Eq, Show, Generic)
makeLenses ''WithForm
type Upload = Summary "Corpus Upload endpoint"
:> "corpus"
:> Capture "corpus_id" CorpusId
- :<|> "addWithquery" :> AsyncJobsAPI ScraperStatus WithQuery ScraperStatus
- :<|> "addWithfile" :> AsyncJobs ScraperStatus '[FormUrlEncoded] WithForm ScraperStatus
+ :<|> "addWithquery" :> AsyncJobsAPI ScraperStatus WithQuery ScraperStatus
+ :<|> "addWithfile" :> AsyncJobs ScraperStatus '[FormUrlEncoded] WithForm ScraperStatus
type AddWithQuery = Summary "Add with Query to corpus endpoint"
, _scst_events = Just []
}
+{- | Model to fork the flow
+-- This is not really optimized since it increases the need RAM
+-- and freezes the whole system
+-- This is mainly for documentation (see a better solution in the function below)
+-- Each process has to be tailored
+addToCorpusWithForm' :: FlowCmdM env err m
+ => CorpusId
+ -> WithForm
+ -> (ScraperStatus -> m ())
+ -> m ScraperStatus
+addToCorpusWithForm' cid (WithForm ft d l) logStatus = do
+ newStatus <- liftIO newEmptyMVar
+ s <- addToCorpusWithForm cid (WithForm ft d l) logStatus
+ _ <- liftIO $ forkIO $ putMVar newStatus s
+ s' <- liftIO $ takeMVar newStatus
+ pure s'
+-}
addToCorpusWithForm :: FlowCmdM env err m
=> CorpusId
-> WithForm
-> (ScraperStatus -> m ())
-> m ScraperStatus
-addToCorpusWithForm cid (WithForm ft d) logStatus = do
+addToCorpusWithForm cid (WithForm ft d l _n) logStatus = do
printDebug "ft" ft
let
parse = case ft of
- CSV_HAL -> parseFormat CsvHal
- CSV -> parseFormat CsvGargV3
- _ -> parseFormat CsvHal
+ CSV_HAL -> Parser.parseFormat Parser.CsvHal
+ CSV -> Parser.parseFormat Parser.CsvGargV3
+ WOS -> Parser.parseFormat Parser.WOS
+ PresseRIS -> Parser.parseFormat Parser.RisPresse
+ newDocs <- liftIO newEmptyMVar
docs <- liftIO
$ splitEvery 500
<$> take 1000000
<$> parse (cs d)
+ _ <- liftIO $ forkIO $ putMVar newDocs docs
logStatus ScraperStatus { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_events = Just []
}
- cid' <- flowCorpus "user1" (Right [cid]) (Multi EN) (map (map toHyperdataDocument) docs)
- printDebug "cid'" cid'
+ docs' <- liftIO $ takeMVar newDocs
+ newCid <- liftIO newEmptyMVar
+ cid' <- flowCorpus "user1"
+ (Right [cid])
+ (Multi $ fromMaybe EN l)
+ (map (map toHyperdataDocument) docs')
+ _ <- liftIO $ forkIO $ putMVar newCid cid'
+
+ cid'' <- liftIO $ takeMVar newCid
+ printDebug "cid'" cid''
pure ScraperStatus { _scst_succeeded = Just 2
, _scst_failed = Just 0