]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams/List.hs
[list] upload CSV endpoint works, but 400 error still thrown
[gargantext.git] / src / Gargantext / API / Ngrams / List.hs
1 {-|
2 Module : Gargantext.API.Ngrams.List
3 Description : Get Ngrams (lists)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE TemplateHaskell #-}
13 {-# LANGUAGE TypeOperators #-}
14
15 module Gargantext.API.Ngrams.List
16 where
17
18 import Control.Lens hiding (elements, Indexed)
19 import Data.Aeson
20 import Data.HashMap.Strict (HashMap)
21 import Data.Map (toList, fromList)
22 import Data.Maybe (catMaybes)
23 import Data.Set (Set)
24 import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
25 import Data.Text (Text, concat, pack)
26 import GHC.Generics (Generic)
27 import Network.HTTP.Media ((//), (/:))
28 import Servant
29 import Servant.Job.Async
30 import Servant.Job.Utils (jsonOptions)
31 import Web.FormUrlEncoded (FromForm)
32 import qualified Data.HashMap.Strict as HashMap
33 import qualified Data.List as List
34 import qualified Data.Map as Map
35 import qualified Data.Text as Text
36
37 import Gargantext.API.Admin.Orchestrator.Types
38 import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams)
39 import Gargantext.API.Ngrams.Tools (getTermsWith)
40 import Gargantext.API.Ngrams.Types
41 import Gargantext.API.Node.Corpus.New.File (FileType(..))
42 import Gargantext.API.Prelude (GargServer)
43 import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
44 import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
45 import Gargantext.Core.Types.Main (ListType(..))
46 import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
47 import Gargantext.Database.Action.Flow (saveDocNgramsWith)
48 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
49 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
50 import Gargantext.Database.Admin.Types.Hyperdata.Document
51 import Gargantext.Database.Admin.Types.Node
52 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
53 import Gargantext.Database.Schema.Ngrams
54 import Gargantext.Database.Schema.Node
55 import Gargantext.Database.Types (Indexed(..))
56 import Gargantext.Prelude
57
58 ------------------------------------------------------------------------
59 type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
60 -- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
61 :<|> PostAPI
62 :<|> CSVPostAPI
63
64 api :: ListId -> GargServer API
65 api l = get l :<|> postAsync l :<|> csvPostAsync l
66
67 data HTML
68 instance Accept HTML where
69 contentType _ = "text" // "html" /: ("charset", "utf-8")
70 instance ToJSON a => MimeRender HTML a where
71 mimeRender _ = encode
72
73 ------------------------------------------------------------------------
74 get :: RepoCmdM env err m =>
75 ListId -> m (Headers '[Header "Content-Disposition" Text] NgramsList)
76 get lId = do
77 lst <- get' lId
78 let (NodeId id') = lId
79 return $ addHeader (concat [ "attachment; filename=GarganText_NgramsList-"
80 , pack $ show id'
81 , ".json"
82 ]
83 ) lst
84
85 get' :: RepoCmdM env err m
86 => ListId -> m NgramsList
87 get' lId = fromList
88 <$> zip ngramsTypes
89 <$> mapM (getNgramsTableMap lId) ngramsTypes
90
91 ------------------------------------------------------------------------
92 -- TODO : purge list
93 -- TODO talk
94 post :: FlowCmdM env err m
95 => ListId
96 -> NgramsList
97 -> m Bool
98 post l m = do
99 -- TODO check with Version for optim
100 _ <- mapM (\(nt, Versioned _v ns) -> setListNgrams l nt ns) $ toList m
101 -- TODO reindex
102 pure True
103
104 ------------------------------------------------------------------------
105 csvPost :: FlowCmdM env err m
106 => ListId
107 -> NgramsList
108 -> m Bool
109 csvPost l m = do
110 printDebug "[csvPost] l" l
111 printDebug "[csvPost] m" m
112 pure True
113
114 -----------------------------------------------------------------------------
115 -- | Re-index documents of a corpus with new ngrams (called orphans here)
116 reIndexWith :: ( HasRepo env
117 , FlowCmdM env err m
118 )
119 => CorpusId
120 -> ListId
121 -> NgramsType
122 -> Set ListType
123 -> m ()
124 reIndexWith cId lId nt lts = do
125 -- Getting [NgramsTerm]
126 ts <- List.concat
127 <$> map (\(k,vs) -> k:vs)
128 <$> HashMap.toList
129 <$> getTermsWith identity [lId] nt lts
130
131 -- printDebug "ts" ts
132
133 -- Taking the ngrams with 0 occurrences only (orphans)
134 occs <- getOccByNgramsOnlyFast' cId lId nt ts
135
136 -- printDebug "occs" occs
137
138 let orphans = List.concat
139 $ map (\t -> case HashMap.lookup t occs of
140 Nothing -> [t]
141 Just n -> if n <= 1 then [t] else [ ]
142 ) ts
143
144 -- printDebug "orphans" orphans
145
146 -- Get all documents of the corpus
147 docs <- selectDocNodes cId
148 -- printDebug "docs length" (List.length docs)
149
150 -- Checking Text documents where orphans match
151 -- TODO Tests here
152 let
153 ngramsByDoc = map (HashMap.fromList)
154 $ map (map (\(k,v) -> (SimpleNgrams (text2ngrams k), v)))
155 $ map (\doc -> List.zip
156 (termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans)
157 $ Text.unlines $ catMaybes
158 [ doc ^. node_hyperdata . hd_title
159 , doc ^. node_hyperdata . hd_abstract
160 ]
161 )
162 (List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. node_id) 1 )]])
163 ) docs
164
165 -- printDebug "ngramsByDoc" ngramsByDoc
166
167 -- Saving the indexation in database
168 _ <- mapM (saveDocNgramsWith lId) ngramsByDoc
169
170 pure () -- ngramsByDoc
171
172 toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams)
173 toIndexedNgrams m t = Indexed <$> i <*> n
174 where
175 i = HashMap.lookup t m
176 n = Just (text2ngrams t)
177
178 ------------------------------------------------------------------------
179 type PostAPI = Summary "Update List"
180 :> "add"
181 :> "form"
182 :> "async"
183 :> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog
184
185 postAsync :: ListId -> GargServer PostAPI
186 postAsync lId =
187 serveJobsAPI $
188 JobFunction (\f log' -> postAsync' lId f (liftBase . log'))
189
190 postAsync' :: FlowCmdM env err m
191 => ListId
192 -> WithFile
193 -> (JobLog -> m ())
194 -> m JobLog
195 postAsync' l (WithFile _ m _) logStatus = do
196
197 logStatus JobLog { _scst_succeeded = Just 0
198 , _scst_failed = Just 0
199 , _scst_remaining = Just 1
200 , _scst_events = Just []
201 }
202 _r <- post l m
203
204 pure JobLog { _scst_succeeded = Just 1
205 , _scst_failed = Just 0
206 , _scst_remaining = Just 0
207 , _scst_events = Just []
208 }
209 ------------------------------------------------------------------------
210 type CSVPostAPI = Summary "Update List (legacy v3 CSV)"
211 :> "csv"
212 :> "add"
213 :> "form"
214 :> "async"
215 :> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog
216
217 csvPostAsync :: ListId -> GargServer PostAPI
218 csvPostAsync lId =
219 serveJobsAPI $
220 JobFunction $ \f@(WithFile ft _ n) log' -> do
221 printDebug "[csvPostAsync] filetype" ft
222 printDebug "[csvPostAsync] name" n
223 csvPostAsync' lId f (liftBase . log')
224
225 csvPostAsync' :: FlowCmdM env err m
226 => ListId
227 -> WithFile
228 -> (JobLog -> m ())
229 -> m JobLog
230 csvPostAsync' l (WithFile _ m _) logStatus = do
231 logStatus JobLog { _scst_succeeded = Just 0
232 , _scst_failed = Just 0
233 , _scst_remaining = Just 1
234 , _scst_events = Just []
235 }
236 _r <- csvPost l m
237
238 pure JobLog { _scst_succeeded = Just 1
239 , _scst_failed = Just 0
240 , _scst_remaining = Just 0
241 , _scst_events = Just []
242 }
243 ------------------------------------------------------------------------
244
245 data WithFile = WithFile
246 { _wf_filetype :: !FileType
247 , _wf_data :: !NgramsList
248 , _wf_name :: !Text
249 } deriving (Eq, Show, Generic)
250
251 makeLenses ''WithFile
252 instance FromForm WithFile
253 instance FromJSON WithFile where
254 parseJSON = genericParseJSON $ jsonOptions "_wf_"
255 instance ToJSON WithFile where
256 toJSON = genericToJSON $ jsonOptions "_wf_"
257 instance ToSchema WithFile where
258 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")