2 Module : Gargantext.API.Corpus.New
3 Description : New corpus API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 New corpus means either:
12 - new data in existing corpus
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE DataKinds #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE FlexibleContexts #-}
22 {-# LANGUAGE RankNTypes #-}
24 module Gargantext.API.Corpus.New
28 import Control.Monad.IO.Class (liftIO)
29 import Data.Aeson.TH (deriveJSON)
31 import Servant.Job.Utils (jsonOptions)
32 import Control.Lens hiding (elements)
33 import Servant.Multipart
35 import Data.Text (Text)
36 import GHC.Generics (Generic)
37 import Servant.Job.Types
38 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
39 import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
40 import Gargantext.Database.Types.Node (CorpusId)
41 import Gargantext.Text.Terms (TermType(..))
42 import Gargantext.Prelude
43 import Gargantext.API.Orchestrator.Types
45 -- import Servant.Job.Server
46 import Test.QuickCheck (elements)
47 import Test.QuickCheck.Arbitrary
48 import Gargantext.Core (Lang(..))
49 import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
50 import qualified Gargantext.Text.Corpus.API as API
51 import Gargantext.Database.Types.Node (UserId)
52 import Gargantext.API.Corpus.New.File
54 data Query = Query { query_query :: Text
55 , query_corpus_id :: Int
56 , query_databases :: [API.ExternalAPIs]
58 deriving (Eq, Show, Generic)
60 deriveJSON (unPrefix "query_") 'Query
63 instance Arbitrary Query where
64 arbitrary = elements [ Query q n fs
67 , fs <- take 3 $ repeat API.externalAPIs
70 instance ToSchema Query where
71 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
73 type Api = Summary "New Corpus endpoint"
74 :> ReqBody '[JSON] Query
75 :> Post '[JSON] CorpusId
76 :<|> Get '[JSON] ApiInfo
78 -- | TODO manage several apis
80 -- TODO this is only the POST
81 api :: (FlowCmdM env err m) => UserId -> Query -> m CorpusId
82 api _uId (Query q _ as) = do
83 cId <- case head as of
84 Nothing -> flowCorpusSearchInDatabase "user1" EN q
85 Just API.All -> flowCorpusSearchInDatabase "user1" EN q
87 docs <- liftIO $ API.get a q (Just 1000)
88 cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
93 ------------------------------------------------
94 data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
96 instance Arbitrary ApiInfo where
97 arbitrary = ApiInfo <$> arbitrary
99 deriveJSON (unPrefix "") 'ApiInfo
101 instance ToSchema ApiInfo
103 info :: FlowCmdM env err m => UserId -> m ApiInfo
104 info _u = pure $ ApiInfo API.externalAPIs
107 -- Proposal to replace the Query type which seems to generically named.
108 data ScraperInput = ScraperInput
109 { _scin_query :: !Text
110 , _scin_corpus_id :: !Int
111 , _scin_databases :: [API.ExternalAPIs]
113 deriving (Eq, Show, Generic)
115 makeLenses ''ScraperInput
117 deriveJSON (unPrefix "_scin_") 'ScraperInput
119 data ScraperEvent = ScraperEvent
120 { _scev_message :: !(Maybe Text)
121 , _scev_level :: !(Maybe Text)
122 , _scev_date :: !(Maybe Text)
126 deriveJSON (unPrefix "_scev_") 'ScraperEvent
128 data ScraperStatus = ScraperStatus
129 { _scst_succeeded :: !(Maybe Int)
130 , _scst_failed :: !(Maybe Int)
131 , _scst_remaining :: !(Maybe Int)
132 , _scst_events :: !(Maybe [ScraperEvent])
136 deriveJSON (unPrefix "_scst_") 'ScraperStatus
141 ------------------------------------------------------------------------
142 ------------------------------------------------------------------------
143 data WithQuery = WithQuery
145 , _wq_databases :: ![ExternalAPIs]
149 makeLenses ''WithQuery
151 instance FromJSON WithQuery where
152 parseJSON = genericParseJSON $ jsonOptions "_wq_"
154 instance ToSchema WithQuery
156 ------------------------------------------------------------------------
158 AddAPI withInput = AsyncJobsAPI ScraperStatus withInput ScraperStatus
159 ------------------------------------------------------------------------
161 type AddWithQuery = Summary "Add to corpus endpoint"
163 :> Capture "corpus_id" CorpusId
169 type AddWithFile = Summary "Add to corpus endpoint"
171 :> Capture "corpus_id" CorpusId
174 :> MultipartForm Mem (MultipartData Mem)
175 :> QueryParam "fileType" FileType
179 ------------------------------------------------------------------------
180 -- TODO WithQuery also has a corpus id
181 addToCorpusJobFunction :: FlowCmdM env err m
184 -> (ScraperStatus -> m ())
186 addToCorpusJobFunction _cid _input logStatus = do
188 logStatus ScraperStatus { _scst_succeeded = Just 10
189 , _scst_failed = Just 2
190 , _scst_remaining = Just 138
191 , _scst_events = Just []
194 pure ScraperStatus { _scst_succeeded = Just 137
195 , _scst_failed = Just 13
196 , _scst_remaining = Just 0
197 , _scst_events = Just []
200 addToCorpusWithFile :: FlowCmdM env err m
204 -> (ScraperStatus -> m ())
206 addToCorpusWithFile cid input filetype logStatus = do
207 logStatus ScraperStatus { _scst_succeeded = Just 10
208 , _scst_failed = Just 2
209 , _scst_remaining = Just 138
210 , _scst_events = Just []
212 _h <- postUpload cid filetype input
214 pure ScraperStatus { _scst_succeeded = Just 137
215 , _scst_failed = Just 13
216 , _scst_remaining = Just 0
217 , _scst_events = Just []