]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Corpus/New.hs
[FIX][API] upload url encoded OK
[gargantext.git] / src / Gargantext / API / Corpus / New.hs
1 {-|
2 Module : Gargantext.API.Corpus.New
3 Description : New corpus API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 New corpus means either:
11 - new corpus
12 - new data in existing corpus
13 -}
14
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE DataKinds #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE FlexibleContexts #-}
22 {-# LANGUAGE RankNTypes #-}
23
24 module Gargantext.API.Corpus.New
25 where
26
27 --import Debug.Trace (trace)
28 import Web.FormUrlEncoded (FromForm)
29 import Data.Either
30 import Control.Monad.IO.Class (liftIO)
31 import Data.Aeson.TH (deriveJSON)
32 import Data.Aeson
33 import Servant.Job.Utils (jsonOptions)
34 import Control.Lens hiding (elements)
35 import Servant.Multipart
36 import Data.Swagger
37 import Data.Text (Text)
38 import GHC.Generics (Generic)
39 import Servant.Job.Types
40 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
41 import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
42 import Gargantext.Database.Types.Node (CorpusId)
43 import Gargantext.Text.Terms (TermType(..))
44 import Gargantext.Prelude
45 import Gargantext.API.Orchestrator.Types
46 import Servant
47 -- import Servant.Job.Server
48 import Test.QuickCheck (elements)
49 import Test.QuickCheck.Arbitrary
50 import Gargantext.Core (Lang(..))
51 import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
52 import qualified Gargantext.Text.Corpus.API as API
53 import Gargantext.Database.Types.Node (UserId)
54 import Gargantext.API.Corpus.New.File
55
56 data Query = Query { query_query :: Text
57 , query_corpus_id :: Int
58 , query_databases :: [API.ExternalAPIs]
59 }
60 deriving (Eq, Show, Generic)
61
62 deriveJSON (unPrefix "query_") 'Query
63
64
65 instance Arbitrary Query where
66 arbitrary = elements [ Query q n fs
67 | q <- ["a","b"]
68 , n <- [0..10]
69 , fs <- take 3 $ repeat API.externalAPIs
70 ]
71
72 instance ToSchema Query where
73 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
74
75 type Api = Summary "New Corpus endpoint"
76 :> ReqBody '[JSON] Query
77 :> Post '[JSON] CorpusId
78 :<|> Get '[JSON] ApiInfo
79
80 -- | TODO manage several apis
81 -- TODO-ACCESS
82 -- TODO this is only the POST
83 api :: (FlowCmdM env err m) => UserId -> Query -> m CorpusId
84 api _uId (Query q _ as) = do
85 cId <- case head as of
86 Nothing -> flowCorpusSearchInDatabase "user1" EN q
87 Just API.All -> flowCorpusSearchInDatabase "user1" EN q
88 Just a -> do
89 docs <- liftIO $ API.get a q (Just 1000)
90 cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
91 pure cId'
92
93 pure cId
94
95 ------------------------------------------------
96 data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
97 deriving (Generic)
98 instance Arbitrary ApiInfo where
99 arbitrary = ApiInfo <$> arbitrary
100
101 deriveJSON (unPrefix "") 'ApiInfo
102
103 instance ToSchema ApiInfo
104
105 info :: FlowCmdM env err m => UserId -> m ApiInfo
106 info _u = pure $ ApiInfo API.externalAPIs
107
108 {-
109 -- Proposal to replace the Query type which seems to generically named.
110 data ScraperInput = ScraperInput
111 { _scin_query :: !Text
112 , _scin_corpus_id :: !Int
113 , _scin_databases :: [API.ExternalAPIs]
114 }
115 deriving (Eq, Show, Generic)
116
117 makeLenses ''ScraperInput
118
119 deriveJSON (unPrefix "_scin_") 'ScraperInput
120
121 data ScraperEvent = ScraperEvent
122 { _scev_message :: !(Maybe Text)
123 , _scev_level :: !(Maybe Text)
124 , _scev_date :: !(Maybe Text)
125 }
126 deriving Generic
127
128 deriveJSON (unPrefix "_scev_") 'ScraperEvent
129
130 data ScraperStatus = ScraperStatus
131 { _scst_succeeded :: !(Maybe Int)
132 , _scst_failed :: !(Maybe Int)
133 , _scst_remaining :: !(Maybe Int)
134 , _scst_events :: !(Maybe [ScraperEvent])
135 }
136 deriving Generic
137
138 deriveJSON (unPrefix "_scst_") 'ScraperStatus
139 -}
140
141
142
143 ------------------------------------------------------------------------
144 ------------------------------------------------------------------------
145 data WithQuery = WithQuery
146 { _wq_query :: !Text
147 , _wq_databases :: ![ExternalAPIs]
148 }
149 deriving Generic
150
151 makeLenses ''WithQuery
152
153 instance FromJSON WithQuery where
154 parseJSON = genericParseJSON $ jsonOptions "_wq_"
155
156 instance ToSchema WithQuery
157 -------------------------------------------------------
158 data WithForm = WithForm
159 { _wf_filetype :: !FileType
160 , _wf_data :: !Text
161 } deriving (Eq, Show, Generic)
162
163 makeLenses ''WithForm
164
165 instance FromJSON WithForm where
166 parseJSON = genericParseJSON $ jsonOptions "_wf_"
167 instance ToSchema WithForm
168 instance FromForm WithForm
169
170
171 ------------------------------------------------------------------------
172 type
173 AddAPI withInput = AsyncJobsAPI ScraperStatus withInput ScraperStatus
174 ------------------------------------------------------------------------
175
176 type AddWithQuery = Summary "Add with Query to corpus endpoint"
177 :> "corpus"
178 :> Capture "corpus_id" CorpusId
179 :> "add"
180 :> "query"
181 :> "async"
182 :> AddAPI WithQuery
183
184 type AddWithFile = Summary "Add with MultipartData to corpus endpoint"
185 :> "corpus"
186 :> Capture "corpus_id" CorpusId
187 :> "add"
188 :> "file"
189 :> MultipartForm Mem (MultipartData Mem)
190 :> QueryParam "fileType" FileType
191 :> "async"
192 :> AddAPI ()
193
194 type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
195 :> "corpus"
196 :> Capture "corpus_id" CorpusId
197 :> "add"
198 :> "form"
199 :> ReqBody '[FormUrlEncoded] WithForm
200 :> "async"
201 :> AddAPI ()
202
203 ------------------------------------------------------------------------
204 -- TODO WithQuery also has a corpus id
205 addToCorpusJobFunction :: FlowCmdM env err m
206 => CorpusId
207 -> WithQuery
208 -> (ScraperStatus -> m ())
209 -> m ScraperStatus
210 addToCorpusJobFunction _cid _input logStatus = do
211 -- TODO ...
212 logStatus ScraperStatus { _scst_succeeded = Just 10
213 , _scst_failed = Just 2
214 , _scst_remaining = Just 138
215 , _scst_events = Just []
216 }
217 -- TODO ...
218 pure ScraperStatus { _scst_succeeded = Just 137
219 , _scst_failed = Just 13
220 , _scst_remaining = Just 0
221 , _scst_events = Just []
222 }
223
224
225 addToCorpusWithFile :: FlowCmdM env err m
226 => CorpusId
227 -> MultipartData Mem
228 -> Maybe FileType
229 -> (ScraperStatus -> m ())
230 -> m ScraperStatus
231 addToCorpusWithFile cid input filetype logStatus = do
232 logStatus ScraperStatus { _scst_succeeded = Just 10
233 , _scst_failed = Just 2
234 , _scst_remaining = Just 138
235 , _scst_events = Just []
236 }
237 _h <- postUpload cid filetype input
238
239 pure ScraperStatus { _scst_succeeded = Just 137
240 , _scst_failed = Just 13
241 , _scst_remaining = Just 0
242 , _scst_events = Just []
243 }
244
245 addToCorpusWithForm :: FlowCmdM env err m
246 => CorpusId
247 -> WithForm
248 -> (ScraperStatus -> m ())
249 -> m ScraperStatus
250 addToCorpusWithForm _cid (WithForm ft d) logStatus = do
251 printDebug "filetype" ft
252 putStrLn ("data" <> show d)
253 logStatus ScraperStatus { _scst_succeeded = Just 10
254 , _scst_failed = Just 2
255 , _scst_remaining = Just 138
256 , _scst_events = Just []
257 }
258
259 pure ScraperStatus { _scst_succeeded = Just 137
260 , _scst_failed = Just 13
261 , _scst_remaining = Just 0
262 , _scst_events = Just []
263 }
264