]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Corpus/New.hs
[API] route for upload WIP.
[gargantext.git] / src / Gargantext / API / Corpus / New.hs
1 {-|
2 Module : Gargantext.API.Corpus.New
3 Description : New corpus API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 New corpus means either:
11 - new corpus
12 - new data in existing corpus
13 -}
14
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE DataKinds #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE FlexibleContexts #-}
22 {-# LANGUAGE RankNTypes #-}
23
24 module Gargantext.API.Corpus.New
25 where
26
27 --import Gargantext.Text.Corpus.Parsers (parseFile, FileFormat(..))
28 import Control.Lens hiding (elements)
29 import Control.Monad.IO.Class (liftIO)
30 import Data.Aeson
31 import Data.Aeson.TH (deriveJSON)
32 import Data.Either
33 import Data.Swagger
34 import Data.Text (Text)
35 import GHC.Generics (Generic)
36 import Gargantext.API.Corpus.New.File
37 import Gargantext.API.Orchestrator.Types
38 import Gargantext.Core (Lang(..))
39 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
40 import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
41 import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
42 import Gargantext.Database.Types.Node (CorpusId)
43 import Gargantext.Database.Types.Node (ToHyperdataDocument(..))
44 import Gargantext.Database.Types.Node (UserId)
45 import Gargantext.Prelude
46 import Gargantext.Text.Corpus.Parsers.CSV (parseHal')
47 import Gargantext.Text.Terms (TermType(..))
48 import Servant
49 import Servant.API.Flatten (Flat)
50 import Servant.Job.Core
51 import Servant.Job.Types
52 import Servant.Job.Utils (jsonOptions)
53 import Servant.Multipart
54 import Test.QuickCheck (elements)
55 import Test.QuickCheck.Arbitrary
56 import Web.FormUrlEncoded (FromForm)
57 import qualified Gargantext.Text.Corpus.API as API
58
59 data Query = Query { query_query :: Text
60 , query_corpus_id :: Int
61 , query_databases :: [API.ExternalAPIs]
62 }
63 deriving (Eq, Show, Generic)
64
65 deriveJSON (unPrefix "query_") 'Query
66
67
68 instance Arbitrary Query where
69 arbitrary = elements [ Query q n fs
70 | q <- ["a","b"]
71 , n <- [0..10]
72 , fs <- take 3 $ repeat API.externalAPIs
73 ]
74
75 instance ToSchema Query where
76 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
77
78 type Api = Summary "New Corpus endpoint"
79 :> ReqBody '[JSON] Query
80 :> Post '[JSON] CorpusId
81 :<|> Get '[JSON] ApiInfo
82
83 -- | TODO manage several apis
84 -- TODO-ACCESS
85 -- TODO this is only the POST
86 api :: (FlowCmdM env err m) => UserId -> Query -> m CorpusId
87 api _uId (Query q _ as) = do
88 cId <- case head as of
89 Nothing -> flowCorpusSearchInDatabase "user1" EN q
90 Just API.All -> flowCorpusSearchInDatabase "user1" EN q
91 Just a -> do
92 docs <- liftIO $ API.get a q (Just 1000)
93 cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
94 pure cId'
95
96 pure cId
97
98 ------------------------------------------------
99 data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
100 deriving (Generic)
101 instance Arbitrary ApiInfo where
102 arbitrary = ApiInfo <$> arbitrary
103
104 deriveJSON (unPrefix "") 'ApiInfo
105
106 instance ToSchema ApiInfo
107
108 info :: FlowCmdM env err m => UserId -> m ApiInfo
109 info _u = pure $ ApiInfo API.externalAPIs
110
111 ------------------------------------------------------------------------
112 ------------------------------------------------------------------------
113 data WithQuery = WithQuery
114 { _wq_query :: !Text
115 , _wq_databases :: ![ExternalAPIs]
116 }
117 deriving Generic
118
119 makeLenses ''WithQuery
120 instance FromJSON WithQuery where
121 parseJSON = genericParseJSON $ jsonOptions "_wq_"
122 instance ToSchema WithQuery where
123 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wq_")
124
125 -------------------------------------------------------
126 data WithForm = WithForm
127 { _wf_filetype :: !FileType
128 , _wf_data :: !Text
129 } deriving (Eq, Show, Generic)
130
131 makeLenses ''WithForm
132 instance FromForm WithForm
133 instance FromJSON WithForm where
134 parseJSON = genericParseJSON $ jsonOptions "_wf_"
135 instance ToSchema WithForm where
136 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
137
138 ------------------------------------------------------------------------
139 type AsyncJobs event ctI input output =
140 Flat (AsyncJobsAPI' 'Unsafe 'Safe ctI '[JSON] Maybe event input output)
141 ------------------------------------------------------------------------
142
143 type Upload = Summary "Corpus Upload endpoint"
144 :> "corpus"
145 :> Capture "corpus_id" CorpusId
146 :<|> "addWithquery" :> AsyncJobsAPI ScraperStatus WithQuery ScraperStatus
147 :<|> "addWithfile" :> AsyncJobs ScraperStatus '[FormUrlEncoded] WithForm ScraperStatus
148
149
150 type AddWithQuery = Summary "Add with Query to corpus endpoint"
151 :> "corpus"
152 :> Capture "corpus_id" CorpusId
153 :> "add"
154 :> "query"
155 :> "async"
156 :> AsyncJobsAPI ScraperStatus WithQuery ScraperStatus
157
158 type AddWithFile = Summary "Add with MultipartData to corpus endpoint"
159 :> "corpus"
160 :> Capture "corpus_id" CorpusId
161 :> "add"
162 :> "file"
163 :> MultipartForm Mem (MultipartData Mem)
164 :> QueryParam "fileType" FileType
165 :> "async"
166 :> AsyncJobs ScraperStatus '[JSON] () ScraperStatus
167
168 type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
169 :> "corpus"
170 :> Capture "corpus_id" CorpusId
171 :> "add"
172 :> "form"
173 :> "async"
174 :> AsyncJobs ScraperStatus '[FormUrlEncoded] WithForm ScraperStatus
175
176 ------------------------------------------------------------------------
177 -- TODO WithQuery also has a corpus id
178 addToCorpusJobFunction :: FlowCmdM env err m
179 => CorpusId
180 -> WithQuery
181 -> (ScraperStatus -> m ())
182 -> m ScraperStatus
183 addToCorpusJobFunction _cid (WithQuery _q _dbs) logStatus = do
184 -- TODO ...
185 logStatus ScraperStatus { _scst_succeeded = Just 10
186 , _scst_failed = Just 2
187 , _scst_remaining = Just 138
188 , _scst_events = Just []
189 }
190 -- TODO ...
191 pure ScraperStatus { _scst_succeeded = Just 137
192 , _scst_failed = Just 13
193 , _scst_remaining = Just 0
194 , _scst_events = Just []
195 }
196
197
198 addToCorpusWithFile :: FlowCmdM env err m
199 => CorpusId
200 -> MultipartData Mem
201 -> Maybe FileType
202 -> (ScraperStatus -> m ())
203 -> m ScraperStatus
204 addToCorpusWithFile cid input filetype logStatus = do
205 logStatus ScraperStatus { _scst_succeeded = Just 10
206 , _scst_failed = Just 2
207 , _scst_remaining = Just 138
208 , _scst_events = Just []
209 }
210 _h <- postUpload cid filetype input
211
212 pure ScraperStatus { _scst_succeeded = Just 137
213 , _scst_failed = Just 13
214 , _scst_remaining = Just 0
215 , _scst_events = Just []
216 }
217
218 addToCorpusWithForm :: FlowCmdM env err m
219 => CorpusId
220 -> WithForm
221 -> (ScraperStatus -> m ())
222 -> m ScraperStatus
223 addToCorpusWithForm cid (WithForm _ft d) logStatus = do
224
225 let docs = splitEvery 500
226 $ take 10000
227 $ parseHal' (cs d)
228
229 logStatus ScraperStatus { _scst_succeeded = Just 1
230 , _scst_failed = Just 0
231 , _scst_remaining = Just 1
232 , _scst_events = Just []
233 }
234
235 cid' <- flowCorpus "user1" (Right [cid]) (Multi EN) (map (map toHyperdataDocument) docs)
236 printDebug "cid'" cid'
237
238 pure ScraperStatus { _scst_succeeded = Just 2
239 , _scst_failed = Just 0
240 , _scst_remaining = Just 0
241 , _scst_events = Just []
242 }
243