2 Module : Gargantext.API.Corpus.New
3 Description : New corpus API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 New corpus means either:
12 - new data in existing corpus
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE DataKinds #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE FlexibleContexts #-}
22 {-# LANGUAGE RankNTypes #-}
24 module Gargantext.API.Corpus.New
27 import Web.FormUrlEncoded (FromForm)
29 import Control.Monad.IO.Class (liftIO)
30 import Data.Aeson.TH (deriveJSON)
32 import Servant.Job.Utils (jsonOptions)
33 import Control.Lens hiding (elements)
34 import Servant.Multipart
36 import Data.Text (Text)
37 import GHC.Generics (Generic)
38 import Servant.Job.Types
39 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
40 import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
41 import Gargantext.Database.Types.Node (CorpusId)
42 import Gargantext.Text.Terms (TermType(..))
43 import Gargantext.Prelude
44 import Gargantext.API.Orchestrator.Types
46 -- import Servant.Job.Server
47 import Test.QuickCheck (elements)
48 import Test.QuickCheck.Arbitrary
49 import Gargantext.Core (Lang(..))
50 import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
51 import qualified Gargantext.Text.Corpus.API as API
52 import Gargantext.Database.Types.Node (UserId)
53 import Gargantext.API.Corpus.New.File
55 data Query = Query { query_query :: Text
56 , query_corpus_id :: Int
57 , query_databases :: [API.ExternalAPIs]
59 deriving (Eq, Show, Generic)
61 deriveJSON (unPrefix "query_") 'Query
64 instance Arbitrary Query where
65 arbitrary = elements [ Query q n fs
68 , fs <- take 3 $ repeat API.externalAPIs
71 instance ToSchema Query where
72 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
74 type Api = Summary "New Corpus endpoint"
75 :> ReqBody '[JSON] Query
76 :> Post '[JSON] CorpusId
77 :<|> Get '[JSON] ApiInfo
79 -- | TODO manage several apis
81 -- TODO this is only the POST
82 api :: (FlowCmdM env err m) => UserId -> Query -> m CorpusId
83 api _uId (Query q _ as) = do
84 cId <- case head as of
85 Nothing -> flowCorpusSearchInDatabase "user1" EN q
86 Just API.All -> flowCorpusSearchInDatabase "user1" EN q
88 docs <- liftIO $ API.get a q (Just 1000)
89 cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
94 ------------------------------------------------
95 data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
97 instance Arbitrary ApiInfo where
98 arbitrary = ApiInfo <$> arbitrary
100 deriveJSON (unPrefix "") 'ApiInfo
102 instance ToSchema ApiInfo
104 info :: FlowCmdM env err m => UserId -> m ApiInfo
105 info _u = pure $ ApiInfo API.externalAPIs
108 -- Proposal to replace the Query type which seems to generically named.
109 data ScraperInput = ScraperInput
110 { _scin_query :: !Text
111 , _scin_corpus_id :: !Int
112 , _scin_databases :: [API.ExternalAPIs]
114 deriving (Eq, Show, Generic)
116 makeLenses ''ScraperInput
118 deriveJSON (unPrefix "_scin_") 'ScraperInput
120 data ScraperEvent = ScraperEvent
121 { _scev_message :: !(Maybe Text)
122 , _scev_level :: !(Maybe Text)
123 , _scev_date :: !(Maybe Text)
127 deriveJSON (unPrefix "_scev_") 'ScraperEvent
129 data ScraperStatus = ScraperStatus
130 { _scst_succeeded :: !(Maybe Int)
131 , _scst_failed :: !(Maybe Int)
132 , _scst_remaining :: !(Maybe Int)
133 , _scst_events :: !(Maybe [ScraperEvent])
137 deriveJSON (unPrefix "_scst_") 'ScraperStatus
142 ------------------------------------------------------------------------
143 ------------------------------------------------------------------------
144 data WithQuery = WithQuery
146 , _wq_databases :: ![ExternalAPIs]
150 makeLenses ''WithQuery
152 instance FromJSON WithQuery where
153 parseJSON = genericParseJSON $ jsonOptions "_wq_"
155 instance ToSchema WithQuery
156 -------------------------------------------------------
157 data WithForm = WithForm
158 { _wf_filetype :: !FileType
160 } deriving (Eq, Show, Generic)
162 makeLenses ''WithForm
164 instance FromJSON WithForm where
165 parseJSON = genericParseJSON $ jsonOptions "_wf_"
166 instance ToSchema WithForm
167 instance FromForm WithForm
170 ------------------------------------------------------------------------
172 AddAPI withInput = AsyncJobsAPI ScraperStatus withInput ScraperStatus
173 ------------------------------------------------------------------------
175 type AddWithQuery = Summary "Add to corpus endpoint"
177 :> Capture "corpus_id" CorpusId
183 type AddWithFile = Summary "Add to corpus endpoint"
185 :> Capture "corpus_id" CorpusId
188 :> MultipartForm Mem (MultipartData Mem)
189 :> QueryParam "fileType" FileType
193 type AddWithForm = Summary "Add to corpus endpoint"
195 :> Capture "corpus_id" CorpusId
198 :> ReqBody '[FormUrlEncoded] WithForm
202 ------------------------------------------------------------------------
203 -- TODO WithQuery also has a corpus id
204 addToCorpusJobFunction :: FlowCmdM env err m
207 -> (ScraperStatus -> m ())
209 addToCorpusJobFunction _cid _input logStatus = do
211 logStatus ScraperStatus { _scst_succeeded = Just 10
212 , _scst_failed = Just 2
213 , _scst_remaining = Just 138
214 , _scst_events = Just []
217 pure ScraperStatus { _scst_succeeded = Just 137
218 , _scst_failed = Just 13
219 , _scst_remaining = Just 0
220 , _scst_events = Just []
224 addToCorpusWithFile :: FlowCmdM env err m
228 -> (ScraperStatus -> m ())
230 addToCorpusWithFile cid input filetype logStatus = do
231 logStatus ScraperStatus { _scst_succeeded = Just 10
232 , _scst_failed = Just 2
233 , _scst_remaining = Just 138
234 , _scst_events = Just []
236 _h <- postUpload cid filetype input
238 pure ScraperStatus { _scst_succeeded = Just 137
239 , _scst_failed = Just 13
240 , _scst_remaining = Just 0
241 , _scst_events = Just []
244 addToCorpusWithForm :: FlowCmdM env err m
247 -> (ScraperStatus -> m ())
249 addToCorpusWithForm _cid (WithForm ft d) logStatus = do
250 logStatus ScraperStatus { _scst_succeeded = Just 10
251 , _scst_failed = Just 2
252 , _scst_remaining = Just 138
253 , _scst_events = Just []
255 _ <- putStrLn $ show ft
256 _ <- putStrLn $ show d
258 pure ScraperStatus { _scst_succeeded = Just 137
259 , _scst_failed = Just 13
260 , _scst_remaining = Just 0
261 , _scst_events = Just []