- new data in existing corpus
-}
-{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
-{-# LANGUAGE DeriveGeneric #-}
-{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RankNTypes #-}
-{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.API.Node.Corpus.New
where
import Data.Maybe (fromMaybe)
import Data.Swagger
import Data.Text (Text)
+import qualified Data.Text as T
import GHC.Generics (Generic)
-import Gargantext.API.Admin.Orchestrator.Types (ScraperStatus(..))
+import Servant
+import Servant.Job.Utils (jsonOptions)
+-- import Servant.Multipart
+-- import Test.QuickCheck (elements)
+import Test.QuickCheck.Arbitrary
+
+import Gargantext.Prelude
+
+import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import qualified Gargantext.API.Admin.Orchestrator.Types as T
+import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Node.Corpus.New.File
+import Gargantext.API.Node.Types
import Gargantext.Core (Lang(..){-, allLangs-})
+import Gargantext.Database.Action.Mail (sendMail)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Action.Flow (FlowCmdM, flowCorpus, getDataText, flowDataText, TermType(..), DataOrigin(..){-, allDataOrigins-})
-import Gargantext.Database.Admin.Types.Node (CorpusId, ToHyperdataDocument(..), UserId)
-import Gargantext.Prelude
-import Servant
-import Servant.Job.Core
-import Servant.Job.Types
-import Servant.Job.Utils (jsonOptions)
--- import Servant.Multipart
--- import Test.QuickCheck (elements)
-import Test.QuickCheck.Arbitrary
-import Web.FormUrlEncoded (FromForm)
-import qualified Gargantext.Text.Corpus.API as API
-import qualified Gargantext.Text.Corpus.Parsers as Parser (FileFormat(..), parseFormat)
+import Gargantext.Database.Action.User (getUserId)
+import Gargantext.Database.Action.Node (mkNodeWithParent)
+import Gargantext.Database.Admin.Types.Hyperdata
+import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), UserId)
+import Gargantext.Database.Query.Table.Node (getNodeWith)
+import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
+import Gargantext.Database.Schema.Node (node_hyperdata)
+import qualified Gargantext.Prelude.Utils as GPU
+import qualified Gargantext.Core.Text.Corpus.API as API
+import qualified Gargantext.Core.Text.Corpus.Parsers as Parser (FileFormat(..), parseFormat)
------------------------------------------------------------------------
{-
instance ToSchema WithQuery where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wq_")
--------------------------------------------------------
-data WithForm = WithForm
- { _wf_filetype :: !FileType
- , _wf_data :: !Text
- , _wf_lang :: !(Maybe Lang)
- , _wf_name :: !Text
- } deriving (Eq, Show, Generic)
-
-makeLenses ''WithForm
-instance FromForm WithForm
-instance FromJSON WithForm where
- parseJSON = genericParseJSON $ jsonOptions "_wf_"
-instance ToSchema WithForm where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
-
-------------------------------------------------------------------------
-type AsyncJobs event ctI input output =
- AsyncJobsAPI' 'Unsafe 'Safe ctI '[JSON] Maybe event input output
------------------------------------------------------------------------
type AddWithQuery = Summary "Add with Query to corpus endpoint"
:> "corpus"
:> Capture "corpus_id" CorpusId
:> "query"
- :> AsyncJobs ScraperStatus '[JSON] WithQuery ScraperStatus
+ :> AsyncJobs JobLog '[JSON] WithQuery JobLog
{-
type AddWithFile = Summary "Add with MultipartData to corpus endpoint"
:> MultipartForm Mem (MultipartData Mem)
:> QueryParam "fileType" FileType
:> "async"
- :> AsyncJobs ScraperStatus '[JSON] () ScraperStatus
+ :> AsyncJobs JobLog '[JSON] () JobLog
-}
-type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
- :> "corpus"
- :> Capture "corpus_id" CorpusId
- :> "add"
- :> "form"
- :> "async"
- :> AsyncJobs ScraperStatus '[FormUrlEncoded] WithForm ScraperStatus
-
------------------------------------------------------------------------
-- TODO WithQuery also has a corpus id
=> User
-> CorpusId
-> WithQuery
- -> (ScraperStatus -> m ())
- -> m ScraperStatus
-addToCorpusWithQuery u cid (WithQuery q dbs l _nid) logStatus = do
+ -> Maybe Integer
+ -> (JobLog -> m ())
+ -> m JobLog
+addToCorpusWithQuery user cid (WithQuery q dbs l _nid) maybeLimit logStatus = do
-- TODO ...
- logStatus ScraperStatus { _scst_succeeded = Just 10
- , _scst_failed = Just 2
- , _scst_remaining = Just 138
- , _scst_events = Just []
- }
- printDebug "addToCorpusWithQuery" cid
+ logStatus JobLog { _scst_succeeded = Just 0
+ , _scst_failed = Just 0
+ , _scst_remaining = Just 5
+ , _scst_events = Just []
+ }
+ printDebug "addToCorpusWithQuery" (cid, dbs)
-- TODO add cid
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
- txts <- mapM (\db -> getDataText db (Multi l) q (Just 10000)) [database2origin dbs]
- cids <- mapM (\txt -> flowDataText u txt (Multi l) cid) txts
+ txts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs]
+
+ logStatus JobLog { _scst_succeeded = Just 2
+ , _scst_failed = Just 0
+ , _scst_remaining = Just 1
+ , _scst_events = Just []
+ }
+
+ cids <- mapM (\txt -> flowDataText user txt (Multi l) cid) txts
printDebug "corpus id" cids
+ printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
+ sendMail user
-- TODO ...
- pure ScraperStatus { _scst_succeeded = Just 137
- , _scst_failed = Just 13
- , _scst_remaining = Just 0
- , _scst_events = Just []
- }
+ pure JobLog { _scst_succeeded = Just 3
+ , _scst_failed = Just 0
+ , _scst_remaining = Just 0
+ , _scst_events = Just []
+ }
+
+
+type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
+ :> "corpus"
+ :> Capture "corpus_id" CorpusId
+ :> "add"
+ :> "form"
+ :> "async"
+ :> AsyncJobs JobLog '[FormUrlEncoded] NewWithForm JobLog
addToCorpusWithForm :: FlowCmdM env err m
=> User
-> CorpusId
- -> WithForm
- -> (ScraperStatus -> m ())
- -> m ScraperStatus
-addToCorpusWithForm user cid (WithForm ft d l _n) logStatus = do
-
+ -> NewWithForm
+ -> (JobLog -> m ())
+ -> m JobLog
+addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus = do
+
+ printDebug "[addToCorpusWithForm] Parsing corpus: " cid
+ printDebug "[addToCorpusWithForm] fileType" ft
+ logStatus JobLog { _scst_succeeded = Just 0
+ , _scst_failed = Just 0
+ , _scst_remaining = Just 2
+ , _scst_events = Just []
+ }
let
parse = case ft of
CSV_HAL -> Parser.parseFormat Parser.CsvHal
WOS -> Parser.parseFormat Parser.WOS
PresseRIS -> Parser.parseFormat Parser.RisPresse
- logStatus ScraperStatus { _scst_succeeded = Just 1
- , _scst_failed = Just 0
- , _scst_remaining = Just 1
- , _scst_events = Just []
- }
-
- printDebug "Parsing corpus: " cid
-
-- TODO granularity of the logStatus
docs <- liftBase $ splitEvery 500
<$> take 1000000
<$> parse (cs d)
printDebug "Parsing corpus finished : " cid
- printDebug "Starting extraction : " cid
+ logStatus JobLog { _scst_succeeded = Just 1
+ , _scst_failed = Just 0
+ , _scst_remaining = Just 1
+ , _scst_events = Just []
+ }
+
+ printDebug "Starting extraction : " cid
-- TODO granularity of the logStatus
_cid' <- flowCorpus user
(Right [cid])
(map (map toHyperdataDocument) docs)
printDebug "Extraction finished : " cid
+ printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
+ sendMail user
- pure ScraperStatus { _scst_succeeded = Just 2
- , _scst_failed = Just 0
- , _scst_remaining = Just 0
- , _scst_events = Just []
- }
+ pure JobLog { _scst_succeeded = Just 2
+ , _scst_failed = Just 0
+ , _scst_remaining = Just 0
+ , _scst_events = Just []
+ }
{-
addToCorpusWithFile :: FlowCmdM env err m
=> CorpusId
-> MultipartData Mem
-> Maybe FileType
- -> (ScraperStatus -> m ())
- -> m ScraperStatus
+ -> (JobLog -> m ())
+ -> m JobLog
addToCorpusWithFile cid input filetype logStatus = do
- logStatus ScraperStatus { _scst_succeeded = Just 10
+ logStatus JobLog { _scst_succeeded = Just 10
, _scst_failed = Just 2
, _scst_remaining = Just 138
, _scst_events = Just []
printDebug "addToCorpusWithFile" cid
_h <- postUpload cid filetype input
- pure ScraperStatus { _scst_succeeded = Just 137
+ pure JobLog { _scst_succeeded = Just 137
, _scst_failed = Just 13
, _scst_remaining = Just 0
, _scst_events = Just []
-}
+
+type AddWithFile = Summary "Add with FileUrlEncoded to corpus endpoint"
+ :> "corpus"
+ :> Capture "corpus_id" CorpusId
+ :> "add"
+ :> "file"
+ :> "async"
+ :> AsyncJobs JobLog '[FormUrlEncoded] NewWithFile JobLog
+
+addToCorpusWithFile :: (HasSettings env, FlowCmdM env err m)
+ => User
+ -> CorpusId
+ -> NewWithFile
+ -> (JobLog -> m ())
+ -> m JobLog
+addToCorpusWithFile user cid nwf@(NewWithFile _d _l fName) logStatus = do
+
+ printDebug "[addToCorpusWithFile] Uploading file to corpus: " cid
+ logStatus JobLog { _scst_succeeded = Just 0
+ , _scst_failed = Just 0
+ , _scst_remaining = Just 1
+ , _scst_events = Just []
+ }
+
+ fPath <- GPU.writeFile nwf
+ printDebug "[addToCorpusWithFile] File saved as: " fPath
+
+ uId <- getUserId user
+ nIds <- mkNodeWithParent NodeFile (Just cid) uId fName
+
+ _ <- case nIds of
+ [nId] -> do
+ node <- getNodeWith nId (Proxy :: Proxy HyperdataFile)
+ let hl = node ^. node_hyperdata
+ _ <- updateHyperdata nId $ hl { _hff_name = fName
+ , _hff_path = T.pack fPath }
+
+ printDebug "[addToCorpusWithFile] Created node with id: " nId
+ _ -> pure ()
+
+ printDebug "[addToCorpusWithFile] File upload to corpus finished: " cid
+
+ printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
+ sendMail user
+
+ pure $ JobLog { _scst_succeeded = Just 1
+ , _scst_failed = Just 0
+ , _scst_remaining = Just 0
+ , _scst_events = Just []
+ }