]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Corpus/New.hs
[API] upload
[gargantext.git] / src / Gargantext / API / Corpus / New.hs
1 {-|
2 Module : Gargantext.API.Corpus.New
3 Description : New corpus API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 New corpus means either:
11 - new corpus
12 - new data in existing corpus
13 -}
14
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE DataKinds #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE FlexibleContexts #-}
22 {-# LANGUAGE RankNTypes #-}
23
24 module Gargantext.API.Corpus.New
25 where
26
27 import Data.Either
28 import Control.Monad.IO.Class (liftIO)
29 import Data.Aeson.TH (deriveJSON)
30 import Data.Aeson
31 import Servant.Job.Utils (jsonOptions)
32 import Control.Lens hiding (elements)
33 import Servant.Multipart
34 import Data.Swagger
35 import Data.Text (Text)
36 import GHC.Generics (Generic)
37 import Servant.Job.Types
38 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
39 import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
40 import Gargantext.Database.Types.Node (CorpusId)
41 import Gargantext.Text.Terms (TermType(..))
42 import Gargantext.Prelude
43 import Gargantext.API.Orchestrator.Types
44 import Servant
45 -- import Servant.Job.Server
46 import Test.QuickCheck (elements)
47 import Test.QuickCheck.Arbitrary
48 import Gargantext.Core (Lang(..))
49 import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
50 import qualified Gargantext.Text.Corpus.API as API
51 import Gargantext.Database.Types.Node (UserId)
52 import Gargantext.API.Corpus.New.File
53
54 data Query = Query { query_query :: Text
55 , query_corpus_id :: Int
56 , query_databases :: [API.ExternalAPIs]
57 }
58 deriving (Eq, Show, Generic)
59
60 deriveJSON (unPrefix "query_") 'Query
61
62
63 instance Arbitrary Query where
64 arbitrary = elements [ Query q n fs
65 | q <- ["a","b"]
66 , n <- [0..10]
67 , fs <- take 3 $ repeat API.externalAPIs
68 ]
69
70 instance ToSchema Query where
71 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
72
73 type Api = Summary "New Corpus endpoint"
74 :> ReqBody '[JSON] Query
75 :> Post '[JSON] CorpusId
76 :<|> Get '[JSON] ApiInfo
77
78 -- | TODO manage several apis
79 -- TODO-ACCESS
80 -- TODO this is only the POST
81 api :: (FlowCmdM env err m) => UserId -> Query -> m CorpusId
82 api _uId (Query q _ as) = do
83 cId <- case head as of
84 Nothing -> flowCorpusSearchInDatabase "user1" EN q
85 Just API.All -> flowCorpusSearchInDatabase "user1" EN q
86 Just a -> do
87 docs <- liftIO $ API.get a q (Just 1000)
88 cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
89 pure cId'
90
91 pure cId
92
93 ------------------------------------------------
94 data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
95 deriving (Generic)
96 instance Arbitrary ApiInfo where
97 arbitrary = ApiInfo <$> arbitrary
98
99 deriveJSON (unPrefix "") 'ApiInfo
100
101 instance ToSchema ApiInfo
102
103 info :: FlowCmdM env err m => UserId -> m ApiInfo
104 info _u = pure $ ApiInfo API.externalAPIs
105
106 {-
107 -- Proposal to replace the Query type which seems to generically named.
108 data ScraperInput = ScraperInput
109 { _scin_query :: !Text
110 , _scin_corpus_id :: !Int
111 , _scin_databases :: [API.ExternalAPIs]
112 }
113 deriving (Eq, Show, Generic)
114
115 makeLenses ''ScraperInput
116
117 deriveJSON (unPrefix "_scin_") 'ScraperInput
118
119 data ScraperEvent = ScraperEvent
120 { _scev_message :: !(Maybe Text)
121 , _scev_level :: !(Maybe Text)
122 , _scev_date :: !(Maybe Text)
123 }
124 deriving Generic
125
126 deriveJSON (unPrefix "_scev_") 'ScraperEvent
127
128 data ScraperStatus = ScraperStatus
129 { _scst_succeeded :: !(Maybe Int)
130 , _scst_failed :: !(Maybe Int)
131 , _scst_remaining :: !(Maybe Int)
132 , _scst_events :: !(Maybe [ScraperEvent])
133 }
134 deriving Generic
135
136 deriveJSON (unPrefix "_scst_") 'ScraperStatus
137 -}
138
139
140
141 ------------------------------------------------------------------------
142 ------------------------------------------------------------------------
143 data WithQuery = WithQuery
144 { _wq_query :: !Text
145 , _wq_databases :: ![ExternalAPIs]
146 }
147 deriving Generic
148
149 makeLenses ''WithQuery
150
151 instance FromJSON WithQuery where
152 parseJSON = genericParseJSON $ jsonOptions "_wq_"
153
154 instance ToSchema WithQuery
155
156 data WithForm = WithForm
157 { _wf_filetype :: !FileType
158 , _wf_data :: !Text
159 } deriving Generic
160
161 makeLenses ''WithForm
162
163 instance FromJSON WithForm where
164 parseJSON = genericParseJSON $ jsonOptions "_wf_"
165
166 instance ToSchema WithForm
167
168
169 ------------------------------------------------------------------------
170 type
171 AddAPI withInput = AsyncJobsAPI ScraperStatus withInput ScraperStatus
172 ------------------------------------------------------------------------
173
174 type AddWithQuery = Summary "Add to corpus endpoint"
175 :> "corpus"
176 :> Capture "corpus_id" CorpusId
177 :> "add"
178 :> "query"
179 :> "async"
180 :> AddAPI WithQuery
181
182 type AddWithFile = Summary "Add to corpus endpoint"
183 :> "corpus"
184 :> Capture "corpus_id" CorpusId
185 :> "add"
186 :> "file"
187 :> MultipartForm Mem (MultipartData Mem)
188 :> QueryParam "fileType" FileType
189 :> "async"
190 :> AddAPI ()
191
192 type AddWithForm = Summary "Add to corpus endpoint"
193 :> "corpus"
194 :> Capture "corpus_id" CorpusId
195 :> "add"
196 :> "form"
197 :> "async"
198 :> AddAPI WithForm
199
200
201 ------------------------------------------------------------------------
202 -- TODO WithQuery also has a corpus id
203 addToCorpusJobFunction :: FlowCmdM env err m
204 => CorpusId
205 -> WithQuery
206 -> (ScraperStatus -> m ())
207 -> m ScraperStatus
208 addToCorpusJobFunction _cid _input logStatus = do
209 -- TODO ...
210 logStatus ScraperStatus { _scst_succeeded = Just 10
211 , _scst_failed = Just 2
212 , _scst_remaining = Just 138
213 , _scst_events = Just []
214 }
215 -- TODO ...
216 pure ScraperStatus { _scst_succeeded = Just 137
217 , _scst_failed = Just 13
218 , _scst_remaining = Just 0
219 , _scst_events = Just []
220 }
221
222
223 addToCorpusWithFile :: FlowCmdM env err m
224 => CorpusId
225 -> MultipartData Mem
226 -> Maybe FileType
227 -> (ScraperStatus -> m ())
228 -> m ScraperStatus
229 addToCorpusWithFile cid input filetype logStatus = do
230 logStatus ScraperStatus { _scst_succeeded = Just 10
231 , _scst_failed = Just 2
232 , _scst_remaining = Just 138
233 , _scst_events = Just []
234 }
235 _h <- postUpload cid filetype input
236
237 pure ScraperStatus { _scst_succeeded = Just 137
238 , _scst_failed = Just 13
239 , _scst_remaining = Just 0
240 , _scst_events = Just []
241 }
242
243 addToCorpusWithForm :: FlowCmdM env err m
244 => CorpusId
245 -> WithForm
246 -> (ScraperStatus -> m ())
247 -> m ScraperStatus
248 addToCorpusWithForm cid (WithForm ft d) logStatus = do
249 logStatus ScraperStatus { _scst_succeeded = Just 10
250 , _scst_failed = Just 2
251 , _scst_remaining = Just 138
252 , _scst_events = Just []
253 }
254 _h <- putStrLn $ show ft
255
256 pure ScraperStatus { _scst_succeeded = Just 137
257 , _scst_failed = Just 13
258 , _scst_remaining = Just 0
259 , _scst_events = Just []
260 }
261