2 Module : Gargantext.API.Corpus.New
3 Description : New corpus API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 New corpus means either:
12 - new data in existing corpus
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE DataKinds #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE FlexibleContexts #-}
22 {-# LANGUAGE RankNTypes #-}
24 module Gargantext.API.Corpus.New
27 --import Debug.Trace (trace)
28 import Web.FormUrlEncoded (FromForm)
30 import Control.Monad.IO.Class (liftIO)
31 import Data.Aeson.TH (deriveJSON)
33 import Servant.Job.Utils (jsonOptions)
34 import Control.Lens hiding (elements)
35 import Servant.Multipart
37 import Data.Text (Text)
38 import GHC.Generics (Generic)
39 import Servant.Job.Types
40 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
41 import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
42 import Gargantext.Database.Types.Node (CorpusId)
43 import Gargantext.Text.Terms (TermType(..))
44 import Gargantext.Prelude
45 import Gargantext.API.Orchestrator.Types
47 -- import Servant.Job.Server
48 import Test.QuickCheck (elements)
49 import Test.QuickCheck.Arbitrary
50 import Gargantext.Core (Lang(..))
51 import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
52 import qualified Gargantext.Text.Corpus.API as API
53 import Gargantext.Database.Types.Node (UserId)
54 import Gargantext.API.Corpus.New.File
56 data Query = Query { query_query :: Text
57 , query_corpus_id :: Int
58 , query_databases :: [API.ExternalAPIs]
60 deriving (Eq, Show, Generic)
62 deriveJSON (unPrefix "query_") 'Query
65 instance Arbitrary Query where
66 arbitrary = elements [ Query q n fs
69 , fs <- take 3 $ repeat API.externalAPIs
72 instance ToSchema Query where
73 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
75 type Api = Summary "New Corpus endpoint"
76 :> ReqBody '[JSON] Query
77 :> Post '[JSON] CorpusId
78 :<|> Get '[JSON] ApiInfo
80 -- | TODO manage several apis
82 -- TODO this is only the POST
83 api :: (FlowCmdM env err m) => UserId -> Query -> m CorpusId
84 api _uId (Query q _ as) = do
85 cId <- case head as of
86 Nothing -> flowCorpusSearchInDatabase "user1" EN q
87 Just API.All -> flowCorpusSearchInDatabase "user1" EN q
89 docs <- liftIO $ API.get a q (Just 1000)
90 cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
95 ------------------------------------------------
96 data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
98 instance Arbitrary ApiInfo where
99 arbitrary = ApiInfo <$> arbitrary
101 deriveJSON (unPrefix "") 'ApiInfo
103 instance ToSchema ApiInfo
105 info :: FlowCmdM env err m => UserId -> m ApiInfo
106 info _u = pure $ ApiInfo API.externalAPIs
109 -- Proposal to replace the Query type which seems to generically named.
110 data ScraperInput = ScraperInput
111 { _scin_query :: !Text
112 , _scin_corpus_id :: !Int
113 , _scin_databases :: [API.ExternalAPIs]
115 deriving (Eq, Show, Generic)
117 makeLenses ''ScraperInput
119 deriveJSON (unPrefix "_scin_") 'ScraperInput
121 data ScraperEvent = ScraperEvent
122 { _scev_message :: !(Maybe Text)
123 , _scev_level :: !(Maybe Text)
124 , _scev_date :: !(Maybe Text)
128 deriveJSON (unPrefix "_scev_") 'ScraperEvent
130 data ScraperStatus = ScraperStatus
131 { _scst_succeeded :: !(Maybe Int)
132 , _scst_failed :: !(Maybe Int)
133 , _scst_remaining :: !(Maybe Int)
134 , _scst_events :: !(Maybe [ScraperEvent])
138 deriveJSON (unPrefix "_scst_") 'ScraperStatus
143 ------------------------------------------------------------------------
144 ------------------------------------------------------------------------
145 data WithQuery = WithQuery
147 , _wq_databases :: ![ExternalAPIs]
151 makeLenses ''WithQuery
153 instance FromJSON WithQuery where
154 parseJSON = genericParseJSON $ jsonOptions "_wq_"
156 instance ToSchema WithQuery
157 -------------------------------------------------------
158 data WithForm = WithForm
159 { _wf_filetype :: !FileType
161 } deriving (Eq, Show, Generic)
163 makeLenses ''WithForm
165 instance FromJSON WithForm where
166 parseJSON = genericParseJSON $ jsonOptions "_wf_"
167 instance ToSchema WithForm
168 instance FromForm WithForm
171 ------------------------------------------------------------------------
173 AddAPI withInput = AsyncJobsAPI ScraperStatus withInput ScraperStatus
174 ------------------------------------------------------------------------
176 type AddWithQuery = Summary "Add with Query to corpus endpoint"
178 :> Capture "corpus_id" CorpusId
184 type AddWithFile = Summary "Add with MultipartData to corpus endpoint"
186 :> Capture "corpus_id" CorpusId
189 :> MultipartForm Mem (MultipartData Mem)
190 :> QueryParam "fileType" FileType
194 type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
196 :> Capture "corpus_id" CorpusId
199 :> ReqBody '[FormUrlEncoded] WithForm
203 ------------------------------------------------------------------------
204 -- TODO WithQuery also has a corpus id
205 addToCorpusJobFunction :: FlowCmdM env err m
208 -> (ScraperStatus -> m ())
210 addToCorpusJobFunction _cid _input logStatus = do
212 logStatus ScraperStatus { _scst_succeeded = Just 10
213 , _scst_failed = Just 2
214 , _scst_remaining = Just 138
215 , _scst_events = Just []
218 pure ScraperStatus { _scst_succeeded = Just 137
219 , _scst_failed = Just 13
220 , _scst_remaining = Just 0
221 , _scst_events = Just []
225 addToCorpusWithFile :: FlowCmdM env err m
229 -> (ScraperStatus -> m ())
231 addToCorpusWithFile cid input filetype logStatus = do
232 logStatus ScraperStatus { _scst_succeeded = Just 10
233 , _scst_failed = Just 2
234 , _scst_remaining = Just 138
235 , _scst_events = Just []
237 _h <- postUpload cid filetype input
239 pure ScraperStatus { _scst_succeeded = Just 137
240 , _scst_failed = Just 13
241 , _scst_remaining = Just 0
242 , _scst_events = Just []
245 addToCorpusWithForm :: FlowCmdM env err m
248 -> (ScraperStatus -> m ())
250 addToCorpusWithForm _cid (WithForm ft d) logStatus = do
251 printDebug "filetype" ft
252 putStrLn ("data" <> show d)
253 logStatus ScraperStatus { _scst_succeeded = Just 10
254 , _scst_failed = Just 2
255 , _scst_remaining = Just 138
256 , _scst_events = Just []
259 pure ScraperStatus { _scst_succeeded = Just 137
260 , _scst_failed = Just 13
261 , _scst_remaining = Just 0
262 , _scst_events = Just []