]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Corpus/New.hs
Merge branch 'dev-phylo' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell-gargantex...
[gargantext.git] / src / Gargantext / API / Corpus / New.hs
1 {-|
2 Module : Gargantext.API.Corpus.New
3 Description : New corpus API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 New corpus means either:
11 - new corpus
12 - new data in existing corpus
13 -}
14
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE DataKinds #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE FlexibleContexts #-}
22 {-# LANGUAGE RankNTypes #-}
23
24 module Gargantext.API.Corpus.New
25 where
26
27 import Data.Either
28 import Control.Monad.IO.Class (liftIO)
29 import Data.Aeson.TH (deriveJSON)
30 import Data.Aeson
31 import Servant.Job.Utils (jsonOptions)
32 import Control.Lens hiding (elements)
33 import Servant.Multipart
34 import Data.Swagger
35 import Data.Text (Text)
36 import GHC.Generics (Generic)
37 import Servant.Job.Types
38 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
39 import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
40 import Gargantext.Database.Types.Node (CorpusId)
41 import Gargantext.Text.Terms (TermType(..))
42 import Gargantext.Prelude
43 import Gargantext.API.Orchestrator.Types
44 import Servant
45 -- import Servant.Job.Server
46 import Test.QuickCheck (elements)
47 import Test.QuickCheck.Arbitrary
48 import Gargantext.Core (Lang(..))
49 import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
50 import qualified Gargantext.Text.Corpus.API as API
51 import Gargantext.Database.Types.Node (UserId)
52 import Gargantext.API.Corpus.New.File
53
54 data Query = Query { query_query :: Text
55 , query_corpus_id :: Int
56 , query_databases :: [API.ExternalAPIs]
57 }
58 deriving (Eq, Show, Generic)
59
60 deriveJSON (unPrefix "query_") 'Query
61
62
63 instance Arbitrary Query where
64 arbitrary = elements [ Query q n fs
65 | q <- ["a","b"]
66 , n <- [0..10]
67 , fs <- take 3 $ repeat API.externalAPIs
68 ]
69
70 instance ToSchema Query where
71 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
72
73 type Api = Summary "New Corpus endpoint"
74 :> ReqBody '[JSON] Query
75 :> Post '[JSON] CorpusId
76 :<|> Get '[JSON] ApiInfo
77
78 -- | TODO manage several apis
79 -- TODO-ACCESS
80 -- TODO this is only the POST
81 api :: (FlowCmdM env err m) => UserId -> Query -> m CorpusId
82 api _uId (Query q _ as) = do
83 cId <- case head as of
84 Nothing -> flowCorpusSearchInDatabase "user1" EN q
85 Just API.All -> flowCorpusSearchInDatabase "user1" EN q
86 Just a -> do
87 docs <- liftIO $ API.get a q (Just 1000)
88 cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
89 pure cId'
90
91 pure cId
92
93 ------------------------------------------------
94 data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
95 deriving (Generic)
96 instance Arbitrary ApiInfo where
97 arbitrary = ApiInfo <$> arbitrary
98
99 deriveJSON (unPrefix "") 'ApiInfo
100
101 instance ToSchema ApiInfo
102
103 info :: FlowCmdM env err m => UserId -> m ApiInfo
104 info _u = pure $ ApiInfo API.externalAPIs
105
106 {-
107 -- Proposal to replace the Query type which seems to generically named.
108 data ScraperInput = ScraperInput
109 { _scin_query :: !Text
110 , _scin_corpus_id :: !Int
111 , _scin_databases :: [API.ExternalAPIs]
112 }
113 deriving (Eq, Show, Generic)
114
115 makeLenses ''ScraperInput
116
117 deriveJSON (unPrefix "_scin_") 'ScraperInput
118
119 data ScraperEvent = ScraperEvent
120 { _scev_message :: !(Maybe Text)
121 , _scev_level :: !(Maybe Text)
122 , _scev_date :: !(Maybe Text)
123 }
124 deriving Generic
125
126 deriveJSON (unPrefix "_scev_") 'ScraperEvent
127
128 data ScraperStatus = ScraperStatus
129 { _scst_succeeded :: !(Maybe Int)
130 , _scst_failed :: !(Maybe Int)
131 , _scst_remaining :: !(Maybe Int)
132 , _scst_events :: !(Maybe [ScraperEvent])
133 }
134 deriving Generic
135
136 deriveJSON (unPrefix "_scst_") 'ScraperStatus
137 -}
138
139
140
141 ------------------------------------------------------------------------
142 ------------------------------------------------------------------------
143 data WithQuery = WithQuery
144 { _wq_query :: !Text
145 , _wq_databases :: ![ExternalAPIs]
146 }
147 deriving Generic
148
149 makeLenses ''WithQuery
150
151 instance FromJSON WithQuery where
152 parseJSON = genericParseJSON $ jsonOptions "_wq_"
153
154 instance ToSchema WithQuery
155
156 ------------------------------------------------------------------------
157 type
158 AddAPI withInput = AsyncJobsAPI ScraperStatus withInput ScraperStatus
159 ------------------------------------------------------------------------
160
161 type AddWithQuery = Summary "Add to corpus endpoint"
162 :> "corpus"
163 :> Capture "corpus_id" CorpusId
164 :> "add"
165 :> "query"
166 :> "async"
167 :> AddAPI WithQuery
168
169 type AddWithFile = Summary "Add to corpus endpoint"
170 :> "corpus"
171 :> Capture "corpus_id" CorpusId
172 :> "add"
173 :> "file"
174 :> MultipartForm Mem (MultipartData Mem)
175 :> QueryParam "fileType" FileType
176 :> "async"
177 :> AddAPI ()
178
179 ------------------------------------------------------------------------
180 -- TODO WithQuery also has a corpus id
181 addToCorpusJobFunction :: FlowCmdM env err m
182 => CorpusId
183 -> WithQuery
184 -> (ScraperStatus -> m ())
185 -> m ScraperStatus
186 addToCorpusJobFunction _cid _input logStatus = do
187 -- TODO ...
188 logStatus ScraperStatus { _scst_succeeded = Just 10
189 , _scst_failed = Just 2
190 , _scst_remaining = Just 138
191 , _scst_events = Just []
192 }
193 -- TODO ...
194 pure ScraperStatus { _scst_succeeded = Just 137
195 , _scst_failed = Just 13
196 , _scst_remaining = Just 0
197 , _scst_events = Just []
198 }
199
200 addToCorpusWithFile :: FlowCmdM env err m
201 => CorpusId
202 -> MultipartData Mem
203 -> Maybe FileType
204 -> (ScraperStatus -> m ())
205 -> m ScraperStatus
206 addToCorpusWithFile cid input filetype logStatus = do
207 logStatus ScraperStatus { _scst_succeeded = Just 10
208 , _scst_failed = Just 2
209 , _scst_remaining = Just 138
210 , _scst_events = Just []
211 }
212 _h <- postUpload cid filetype input
213
214 pure ScraperStatus { _scst_succeeded = Just 137
215 , _scst_failed = Just 13
216 , _scst_remaining = Just 0
217 , _scst_events = Just []
218 }
219