2 Module : Gargantext.API.Corpus.New
3 Description : New corpus API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 New corpus means either:
12 - new data in existing corpus
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE DataKinds #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE FlexibleContexts #-}
22 {-# LANGUAGE RankNTypes #-}
24 module Gargantext.API.Corpus.New
28 import Control.Monad.IO.Class (liftIO)
29 import Data.Aeson.TH (deriveJSON)
31 import Servant.Job.Utils (jsonOptions)
32 import Control.Lens hiding (elements)
33 import Servant.Multipart
35 import Data.Text (Text)
36 import GHC.Generics (Generic)
37 import Servant.Job.Types
38 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
39 import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
40 import Gargantext.Database.Types.Node (CorpusId)
41 import Gargantext.Text.Terms (TermType(..))
42 import Gargantext.Prelude
43 import Gargantext.API.Orchestrator.Types
45 -- import Servant.Job.Server
46 import Test.QuickCheck (elements)
47 import Test.QuickCheck.Arbitrary
48 import Gargantext.Core (Lang(..))
49 import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
50 import qualified Gargantext.Text.Corpus.API as API
51 import Gargantext.Database.Types.Node (UserId)
52 import Gargantext.API.Corpus.New.File
54 data Query = Query { query_query :: Text
55 , query_corpus_id :: Int
56 , query_databases :: [API.ExternalAPIs]
58 deriving (Eq, Show, Generic)
60 deriveJSON (unPrefix "query_") 'Query
63 instance Arbitrary Query where
64 arbitrary = elements [ Query q n fs
67 , fs <- take 3 $ repeat API.externalAPIs
70 instance ToSchema Query where
71 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
73 type Api = Summary "New Corpus endpoint"
74 :> ReqBody '[JSON] Query
75 :> Post '[JSON] CorpusId
76 :<|> Get '[JSON] ApiInfo
78 -- | TODO manage several apis
80 -- TODO this is only the POST
81 api :: (FlowCmdM env err m) => UserId -> Query -> m CorpusId
82 api _uId (Query q _ as) = do
83 cId <- case head as of
84 Nothing -> flowCorpusSearchInDatabase "user1" EN q
85 Just API.All -> flowCorpusSearchInDatabase "user1" EN q
87 docs <- liftIO $ API.get a q (Just 1000)
88 cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
93 ------------------------------------------------
94 data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
96 instance Arbitrary ApiInfo where
97 arbitrary = ApiInfo <$> arbitrary
99 deriveJSON (unPrefix "") 'ApiInfo
101 instance ToSchema ApiInfo
103 info :: FlowCmdM env err m => UserId -> m ApiInfo
104 info _u = pure $ ApiInfo API.externalAPIs
107 -- Proposal to replace the Query type which seems to generically named.
108 data ScraperInput = ScraperInput
109 { _scin_query :: !Text
110 , _scin_corpus_id :: !Int
111 , _scin_databases :: [API.ExternalAPIs]
113 deriving (Eq, Show, Generic)
115 makeLenses ''ScraperInput
117 deriveJSON (unPrefix "_scin_") 'ScraperInput
119 data ScraperEvent = ScraperEvent
120 { _scev_message :: !(Maybe Text)
121 , _scev_level :: !(Maybe Text)
122 , _scev_date :: !(Maybe Text)
126 deriveJSON (unPrefix "_scev_") 'ScraperEvent
128 data ScraperStatus = ScraperStatus
129 { _scst_succeeded :: !(Maybe Int)
130 , _scst_failed :: !(Maybe Int)
131 , _scst_remaining :: !(Maybe Int)
132 , _scst_events :: !(Maybe [ScraperEvent])
136 deriveJSON (unPrefix "_scst_") 'ScraperStatus
141 ------------------------------------------------------------------------
142 ------------------------------------------------------------------------
143 data WithQuery = WithQuery
145 , _wq_databases :: ![ExternalAPIs]
149 makeLenses ''WithQuery
151 instance FromJSON WithQuery where
152 parseJSON = genericParseJSON $ jsonOptions "_wq_"
154 instance ToSchema WithQuery
156 data WithForm = WithForm
157 { _wf_filetype :: !FileType
161 makeLenses ''WithForm
163 instance FromJSON WithForm where
164 parseJSON = genericParseJSON $ jsonOptions "_wf_"
166 instance ToSchema WithForm
169 ------------------------------------------------------------------------
171 AddAPI withInput = AsyncJobsAPI ScraperStatus withInput ScraperStatus
172 ------------------------------------------------------------------------
174 type AddWithQuery = Summary "Add to corpus endpoint"
176 :> Capture "corpus_id" CorpusId
182 type AddWithFile = Summary "Add to corpus endpoint"
184 :> Capture "corpus_id" CorpusId
187 :> MultipartForm Mem (MultipartData Mem)
188 :> QueryParam "fileType" FileType
192 type AddWithForm = Summary "Add to corpus endpoint"
194 :> Capture "corpus_id" CorpusId
201 ------------------------------------------------------------------------
202 -- TODO WithQuery also has a corpus id
203 addToCorpusJobFunction :: FlowCmdM env err m
206 -> (ScraperStatus -> m ())
208 addToCorpusJobFunction _cid _input logStatus = do
210 logStatus ScraperStatus { _scst_succeeded = Just 10
211 , _scst_failed = Just 2
212 , _scst_remaining = Just 138
213 , _scst_events = Just []
216 pure ScraperStatus { _scst_succeeded = Just 137
217 , _scst_failed = Just 13
218 , _scst_remaining = Just 0
219 , _scst_events = Just []
223 addToCorpusWithFile :: FlowCmdM env err m
227 -> (ScraperStatus -> m ())
229 addToCorpusWithFile cid input filetype logStatus = do
230 logStatus ScraperStatus { _scst_succeeded = Just 10
231 , _scst_failed = Just 2
232 , _scst_remaining = Just 138
233 , _scst_events = Just []
235 _h <- postUpload cid filetype input
237 pure ScraperStatus { _scst_succeeded = Just 137
238 , _scst_failed = Just 13
239 , _scst_remaining = Just 0
240 , _scst_events = Just []
243 addToCorpusWithForm :: FlowCmdM env err m
246 -> (ScraperStatus -> m ())
248 addToCorpusWithForm _cid (WithForm ft d) logStatus = do
249 logStatus ScraperStatus { _scst_succeeded = Just 10
250 , _scst_failed = Just 2
251 , _scst_remaining = Just 138
252 , _scst_events = Just []
254 _ <- putStrLn $ show ft
255 _ <- putStrLn $ show d
257 pure ScraperStatus { _scst_succeeded = Just 137
258 , _scst_failed = Just 13
259 , _scst_remaining = Just 0
260 , _scst_events = Just []