]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Corpus/New.hs
[SCRAPER] setup the new version of addToCorpus with servant-job
[gargantext.git] / src / Gargantext / API / Corpus / New.hs
1 {-|
2 Module : Gargantext.API.Corpus.New
3 Description : New corpus API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 New corpus means either:
11 - new corpus
12 - new data in existing corpus
13 -}
14
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE DataKinds #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE FlexibleContexts #-}
22 {-# LANGUAGE RankNTypes #-}
23
24 module Gargantext.API.Corpus.New
25 where
26
27 import Data.Either
28 import Control.Monad.IO.Class (liftIO)
29 import Data.Aeson.TH (deriveJSON)
30 import Data.Swagger
31 import Data.Text (Text)
32 import GHC.Generics (Generic)
33 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
34 import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
35 import Gargantext.Database.Types.Node (CorpusId)
36 import Gargantext.Text.Terms (TermType(..))
37 import Gargantext.Prelude
38 import Gargantext.API.Orchestrator.Types
39 import Servant
40 -- import Servant.Job.Server
41 import Test.QuickCheck (elements)
42 import Test.QuickCheck.Arbitrary
43 import Gargantext.Core (Lang(..))
44 import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
45 import qualified Gargantext.Text.Corpus.API as API
46 import Gargantext.Database.Types.Node (UserId)
47
48 data Query = Query { query_query :: Text
49 , query_corpus_id :: Int
50 , query_databases :: [API.ExternalAPIs]
51 }
52 deriving (Eq, Show, Generic)
53
54 deriveJSON (unPrefix "query_") 'Query
55
56
57 instance Arbitrary Query where
58 arbitrary = elements [ Query q n fs
59 | q <- ["a","b"]
60 , n <- [0..10]
61 , fs <- take 3 $ repeat API.externalAPIs
62 ]
63
64 instance ToSchema Query where
65 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
66
67 type Api = Summary "New Corpus endpoint"
68 :> ReqBody '[JSON] Query
69 :> Post '[JSON] CorpusId
70 :<|> Get '[JSON] ApiInfo
71
72 -- | TODO manage several apis
73 -- TODO-ACCESS
74 -- TODO this is only the POST
75 api :: (FlowCmdM env err m) => Query -> m CorpusId
76 api (Query q _ as) = do
77 cId <- case head as of
78 Nothing -> flowCorpusSearchInDatabase "user1" EN q
79 Just API.All -> flowCorpusSearchInDatabase "user1" EN q
80 Just a -> do
81 docs <- liftIO $ API.get a q (Just 1000)
82 cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
83 pure cId'
84
85 pure cId
86
87 ------------------------------------------------
88 data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
89 deriving (Generic)
90 instance Arbitrary ApiInfo where
91 arbitrary = ApiInfo <$> arbitrary
92
93 deriveJSON (unPrefix "") 'ApiInfo
94
95 instance ToSchema ApiInfo
96
97 info :: FlowCmdM env err m => UserId -> m ApiInfo
98 info _u = pure $ ApiInfo API.externalAPIs
99
100 {-
101 -- Proposal to replace the Query type which seems to generically named.
102 data ScraperInput = ScraperInput
103 { _scin_query :: !Text
104 , _scin_corpus_id :: !Int
105 , _scin_databases :: [API.ExternalAPIs]
106 }
107 deriving (Eq, Show, Generic)
108
109 makeLenses ''ScraperInput
110
111 deriveJSON (unPrefix "_scin_") 'ScraperInput
112
113 data ScraperEvent = ScraperEvent
114 { _scev_message :: !(Maybe Text)
115 , _scev_level :: !(Maybe Text)
116 , _scev_date :: !(Maybe Text)
117 }
118 deriving Generic
119
120 deriveJSON (unPrefix "_scev_") 'ScraperEvent
121
122 data ScraperStatus = ScraperStatus
123 { _scst_succeeded :: !(Maybe Int)
124 , _scst_failed :: !(Maybe Int)
125 , _scst_remaining :: !(Maybe Int)
126 , _scst_events :: !(Maybe [ScraperEvent])
127 }
128 deriving Generic
129
130 deriveJSON (unPrefix "_scst_") 'ScraperStatus
131 -}
132
133 type API_v2 =
134 Summary "Add to corpus endpoint" :>
135 "corpus" :>
136 Capture "id" CorpusId :>
137 "add" :>
138 "async" :> ScraperAPI2
139
140 -- TODO ScraperInput2 also has a corpus id
141 addToCorpusJobFunction :: FlowCmdM env err m => CorpusId -> ScraperInput2 -> (ScraperStatus -> m ()) -> m ScraperStatus
142 addToCorpusJobFunction _cid _input logStatus = do
143 -- TODO ...
144 logStatus ScraperStatus { _scst_succeeded = Just 10
145 , _scst_failed = Just 2
146 , _scst_remaining = Just 138
147 , _scst_events = Just []
148 }
149 -- TODO ...
150 pure ScraperStatus { _scst_succeeded = Just 137
151 , _scst_failed = Just 13
152 , _scst_remaining = Just 0
153 , _scst_events = Just []
154 }