]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Corpus/New.hs
[DB] Master User Texts
[gargantext.git] / src / Gargantext / API / Corpus / New.hs
1 {-|
2 Module : Gargantext.API.Corpus.New
3 Description : New corpus API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 New corpus means either:
11 - new corpus
12 - new data in existing corpus
13 -}
14
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE DataKinds #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE FlexibleContexts #-}
22 {-# LANGUAGE RankNTypes #-}
23
24 module Gargantext.API.Corpus.New
25 where
26
27 import Web.FormUrlEncoded (FromForm)
28 import Data.Either
29 import Control.Monad.IO.Class (liftIO)
30 import Data.Aeson.TH (deriveJSON)
31 import Data.Aeson
32 import Servant.Job.Utils (jsonOptions)
33 import Control.Lens hiding (elements)
34 import Servant.Multipart
35 import Data.Swagger
36 import Data.Text (Text)
37 import GHC.Generics (Generic)
38 import Servant.Job.Types
39 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
40 import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
41 import Gargantext.Database.Types.Node (CorpusId)
42 import Gargantext.Text.Terms (TermType(..))
43 import Gargantext.Prelude
44 import Gargantext.API.Orchestrator.Types
45 import Servant
46 -- import Servant.Job.Server
47 import Test.QuickCheck (elements)
48 import Test.QuickCheck.Arbitrary
49 import Gargantext.Core (Lang(..))
50 import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
51 import qualified Gargantext.Text.Corpus.API as API
52 import Gargantext.Database.Types.Node (UserId)
53 import Gargantext.API.Corpus.New.File
54
55 data Query = Query { query_query :: Text
56 , query_corpus_id :: Int
57 , query_databases :: [API.ExternalAPIs]
58 }
59 deriving (Eq, Show, Generic)
60
61 deriveJSON (unPrefix "query_") 'Query
62
63
64 instance Arbitrary Query where
65 arbitrary = elements [ Query q n fs
66 | q <- ["a","b"]
67 , n <- [0..10]
68 , fs <- take 3 $ repeat API.externalAPIs
69 ]
70
71 instance ToSchema Query where
72 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
73
74 type Api = Summary "New Corpus endpoint"
75 :> ReqBody '[JSON] Query
76 :> Post '[JSON] CorpusId
77 :<|> Get '[JSON] ApiInfo
78
79 -- | TODO manage several apis
80 -- TODO-ACCESS
81 -- TODO this is only the POST
82 api :: (FlowCmdM env err m) => UserId -> Query -> m CorpusId
83 api _uId (Query q _ as) = do
84 cId <- case head as of
85 Nothing -> flowCorpusSearchInDatabase "user1" EN q
86 Just API.All -> flowCorpusSearchInDatabase "user1" EN q
87 Just a -> do
88 docs <- liftIO $ API.get a q (Just 1000)
89 cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
90 pure cId'
91
92 pure cId
93
94 ------------------------------------------------
95 data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
96 deriving (Generic)
97 instance Arbitrary ApiInfo where
98 arbitrary = ApiInfo <$> arbitrary
99
100 deriveJSON (unPrefix "") 'ApiInfo
101
102 instance ToSchema ApiInfo
103
104 info :: FlowCmdM env err m => UserId -> m ApiInfo
105 info _u = pure $ ApiInfo API.externalAPIs
106
107 {-
108 -- Proposal to replace the Query type which seems to generically named.
109 data ScraperInput = ScraperInput
110 { _scin_query :: !Text
111 , _scin_corpus_id :: !Int
112 , _scin_databases :: [API.ExternalAPIs]
113 }
114 deriving (Eq, Show, Generic)
115
116 makeLenses ''ScraperInput
117
118 deriveJSON (unPrefix "_scin_") 'ScraperInput
119
120 data ScraperEvent = ScraperEvent
121 { _scev_message :: !(Maybe Text)
122 , _scev_level :: !(Maybe Text)
123 , _scev_date :: !(Maybe Text)
124 }
125 deriving Generic
126
127 deriveJSON (unPrefix "_scev_") 'ScraperEvent
128
129 data ScraperStatus = ScraperStatus
130 { _scst_succeeded :: !(Maybe Int)
131 , _scst_failed :: !(Maybe Int)
132 , _scst_remaining :: !(Maybe Int)
133 , _scst_events :: !(Maybe [ScraperEvent])
134 }
135 deriving Generic
136
137 deriveJSON (unPrefix "_scst_") 'ScraperStatus
138 -}
139
140
141
142 ------------------------------------------------------------------------
143 ------------------------------------------------------------------------
144 data WithQuery = WithQuery
145 { _wq_query :: !Text
146 , _wq_databases :: ![ExternalAPIs]
147 }
148 deriving Generic
149
150 makeLenses ''WithQuery
151
152 instance FromJSON WithQuery where
153 parseJSON = genericParseJSON $ jsonOptions "_wq_"
154
155 instance ToSchema WithQuery
156 -------------------------------------------------------
157 data WithForm = WithForm
158 { _wf_filetype :: !FileType
159 , _wf_data :: !Text
160 } deriving (Eq, Show, Generic)
161
162 makeLenses ''WithForm
163
164 instance FromJSON WithForm where
165 parseJSON = genericParseJSON $ jsonOptions "_wf_"
166 instance ToSchema WithForm
167 instance FromForm WithForm
168
169
170 ------------------------------------------------------------------------
171 type
172 AddAPI withInput = AsyncJobsAPI ScraperStatus withInput ScraperStatus
173 ------------------------------------------------------------------------
174
175 type AddWithQuery = Summary "Add to corpus endpoint"
176 :> "corpus"
177 :> Capture "corpus_id" CorpusId
178 :> "add"
179 :> "query"
180 :> "async"
181 :> AddAPI WithQuery
182
183 type AddWithFile = Summary "Add to corpus endpoint"
184 :> "corpus"
185 :> Capture "corpus_id" CorpusId
186 :> "add"
187 :> "file"
188 :> MultipartForm Mem (MultipartData Mem)
189 :> QueryParam "fileType" FileType
190 :> "async"
191 :> AddAPI ()
192
193 type AddWithForm = Summary "Add to corpus endpoint"
194 :> "corpus"
195 :> Capture "corpus_id" CorpusId
196 :> "add"
197 :> "form"
198 :> ReqBody '[FormUrlEncoded] WithForm
199 :> "async"
200 :> AddAPI ()
201
202 ------------------------------------------------------------------------
203 -- TODO WithQuery also has a corpus id
204 addToCorpusJobFunction :: FlowCmdM env err m
205 => CorpusId
206 -> WithQuery
207 -> (ScraperStatus -> m ())
208 -> m ScraperStatus
209 addToCorpusJobFunction _cid _input logStatus = do
210 -- TODO ...
211 logStatus ScraperStatus { _scst_succeeded = Just 10
212 , _scst_failed = Just 2
213 , _scst_remaining = Just 138
214 , _scst_events = Just []
215 }
216 -- TODO ...
217 pure ScraperStatus { _scst_succeeded = Just 137
218 , _scst_failed = Just 13
219 , _scst_remaining = Just 0
220 , _scst_events = Just []
221 }
222
223
224 addToCorpusWithFile :: FlowCmdM env err m
225 => CorpusId
226 -> MultipartData Mem
227 -> Maybe FileType
228 -> (ScraperStatus -> m ())
229 -> m ScraperStatus
230 addToCorpusWithFile cid input filetype logStatus = do
231 logStatus ScraperStatus { _scst_succeeded = Just 10
232 , _scst_failed = Just 2
233 , _scst_remaining = Just 138
234 , _scst_events = Just []
235 }
236 _h <- postUpload cid filetype input
237
238 pure ScraperStatus { _scst_succeeded = Just 137
239 , _scst_failed = Just 13
240 , _scst_remaining = Just 0
241 , _scst_events = Just []
242 }
243
244 addToCorpusWithForm :: FlowCmdM env err m
245 => CorpusId
246 -> WithForm
247 -> (ScraperStatus -> m ())
248 -> m ScraperStatus
249 addToCorpusWithForm _cid (WithForm ft d) logStatus = do
250 logStatus ScraperStatus { _scst_succeeded = Just 10
251 , _scst_failed = Just 2
252 , _scst_remaining = Just 138
253 , _scst_events = Just []
254 }
255 _ <- putStrLn $ show ft
256 _ <- putStrLn $ show d
257
258 pure ScraperStatus { _scst_succeeded = Just 137
259 , _scst_failed = Just 13
260 , _scst_remaining = Just 0
261 , _scst_events = Just []
262 }
263