2 Module : Gargantext.API.Corpus.New
3 Description : New corpus API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 New corpus means either:
12 - new data in existing corpus
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE DataKinds #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE FlexibleContexts #-}
22 {-# LANGUAGE RankNTypes #-}
24 module Gargantext.API.Corpus.New
28 import Control.Monad.IO.Class (liftIO)
29 import Data.Aeson.TH (deriveJSON)
31 import Data.Text (Text)
32 import GHC.Generics (Generic)
33 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
34 import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
35 import Gargantext.Database.Types.Node (CorpusId)
36 import Gargantext.Text.Terms (TermType(..))
37 import Gargantext.Prelude
38 import Gargantext.API.Orchestrator.Types
40 -- import Servant.Job.Server
41 import Test.QuickCheck (elements)
42 import Test.QuickCheck.Arbitrary
43 import Gargantext.Core (Lang(..))
44 import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
45 import qualified Gargantext.Text.Corpus.API as API
46 import Gargantext.Database.Types.Node (UserId)
48 data Query = Query { query_query :: Text
49 , query_corpus_id :: Int
50 , query_databases :: [API.ExternalAPIs]
52 deriving (Eq, Show, Generic)
54 deriveJSON (unPrefix "query_") 'Query
57 instance Arbitrary Query where
58 arbitrary = elements [ Query q n fs
61 , fs <- take 3 $ repeat API.externalAPIs
64 instance ToSchema Query where
65 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
67 type Api = Summary "New Corpus endpoint"
68 :> ReqBody '[JSON] Query
69 :> Post '[JSON] CorpusId
70 :<|> Get '[JSON] ApiInfo
72 -- | TODO manage several apis
74 -- TODO this is only the POST
75 api :: (FlowCmdM env err m) => Query -> m CorpusId
76 api (Query q _ as) = do
77 cId <- case head as of
78 Nothing -> flowCorpusSearchInDatabase "user1" EN q
79 Just API.All -> flowCorpusSearchInDatabase "user1" EN q
81 docs <- liftIO $ API.get a q (Just 1000)
82 cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
87 ------------------------------------------------
88 data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
90 instance Arbitrary ApiInfo where
91 arbitrary = ApiInfo <$> arbitrary
93 deriveJSON (unPrefix "") 'ApiInfo
95 instance ToSchema ApiInfo
97 info :: FlowCmdM env err m => UserId -> m ApiInfo
98 info _u = pure $ ApiInfo API.externalAPIs
101 -- Proposal to replace the Query type which seems to generically named.
102 data ScraperInput = ScraperInput
103 { _scin_query :: !Text
104 , _scin_corpus_id :: !Int
105 , _scin_databases :: [API.ExternalAPIs]
107 deriving (Eq, Show, Generic)
109 makeLenses ''ScraperInput
111 deriveJSON (unPrefix "_scin_") 'ScraperInput
113 data ScraperEvent = ScraperEvent
114 { _scev_message :: !(Maybe Text)
115 , _scev_level :: !(Maybe Text)
116 , _scev_date :: !(Maybe Text)
120 deriveJSON (unPrefix "_scev_") 'ScraperEvent
122 data ScraperStatus = ScraperStatus
123 { _scst_succeeded :: !(Maybe Int)
124 , _scst_failed :: !(Maybe Int)
125 , _scst_remaining :: !(Maybe Int)
126 , _scst_events :: !(Maybe [ScraperEvent])
130 deriveJSON (unPrefix "_scst_") 'ScraperStatus
134 Summary "Add to corpus endpoint" :>
136 Capture "id" CorpusId :>
138 "async" :> ScraperAPI2
140 -- TODO ScraperInput2 also has a corpus id
141 addToCorpusJobFunction :: FlowCmdM env err m => CorpusId -> ScraperInput2 -> (ScraperStatus -> m ()) -> m ScraperStatus
142 addToCorpusJobFunction _cid _input logStatus = do
144 logStatus ScraperStatus { _scst_succeeded = Just 10
145 , _scst_failed = Just 2
146 , _scst_remaining = Just 138
147 , _scst_events = Just []
150 pure ScraperStatus { _scst_succeeded = Just 137
151 , _scst_failed = Just 13
152 , _scst_remaining = Just 0
153 , _scst_events = Just []