]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[ngrams] async endpoint for ngrams put
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds -fno-warn-name-shadowing #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TemplateHaskell #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
34 , apiNgramsTableDoc
35
36 , NgramsStatePatch
37 , NgramsTablePatch
38 , NgramsTableMap
39
40 , NgramsTerm(..)
41
42 , NgramsElement(..)
43 , mkNgramsElement
44
45 , RootParent(..)
46
47 , MSet
48 , mSetFromList
49 , mSetToList
50
51 , Repo(..)
52 , r_version
53 , r_state
54 , r_history
55 , NgramsRepo
56 , NgramsRepoElement(..)
57 , saveRepo
58 , initRepo
59
60 , RepoEnv(..)
61 , renv_var
62 , renv_lock
63
64 , TabType(..)
65
66 , HasRepoVar(..)
67 , HasRepoSaver(..)
68 , HasRepo(..)
69 , RepoCmdM
70 , QueryParamR
71 , TODO
72
73 -- Internals
74 , getNgramsTableMap
75 , dumpJsonTableMap
76 , tableNgramsPull
77 , tableNgramsPut
78
79 , Version
80 , Versioned(..)
81 , currentVersion
82 , listNgramsChangedSince
83 )
84 where
85
86 import Control.Concurrent
87 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped)
88 import Control.Monad.Reader
89 import Data.Aeson hiding ((.=))
90 import qualified Data.Aeson.Text as DAT
91 import Data.Either (Either(..))
92 import Data.Foldable
93 import qualified Data.List as List
94 import Data.Map.Strict (Map)
95 import qualified Data.Map.Strict as Map
96 import qualified Data.Map.Strict.Patch as PM
97 import Data.Maybe (fromMaybe)
98 import Data.Monoid
99 import Data.Ord (Down(..))
100 import Data.Patch.Class (Action(act), Transformable(..), ours)
101 import qualified Data.Set as S
102 import qualified Data.Set as Set
103 import Data.Swagger hiding (version, patch)
104 import Data.Text (Text, isInfixOf, unpack)
105 import Data.Text.Lazy.IO as DTL
106 import Formatting (hprint, int, (%))
107 import Formatting.Clock (timeSpecs)
108 import GHC.Generics (Generic)
109 import Servant hiding (Patch)
110 import System.Clock (getTime, TimeSpec, Clock(..))
111 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
112 import System.IO (stderr)
113 import Test.QuickCheck (elements)
114 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
115
116 import Prelude (error)
117 import Gargantext.Prelude
118
119 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
120 import Gargantext.API.Admin.Types (HasSettings)
121 import qualified Gargantext.API.Metrics as Metrics
122 import Gargantext.API.Ngrams.Types
123 import Gargantext.API.Prelude
124 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
125 import Gargantext.Core.Utils (something)
126 -- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
127 -- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
128 import Gargantext.Database.Action.Flow.Types
129 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
130 import Gargantext.Database.Admin.Config (userMaster)
131 import Gargantext.Database.Admin.Types.Node (NodeType(..))
132 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
133 import Gargantext.Database.Query.Table.Node.Select
134 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
135 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
136 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
137 import Gargantext.Database.Query.Table.Node (getNode)
138 import Gargantext.Database.Schema.Node (node_id, node_parentId, node_userId)
139 import Gargantext.Prelude.Job
140
141 {-
142 -- TODO sequences of modifications (Patchs)
143 type NgramsIdPatch = Patch NgramsId NgramsPatch
144
145 ngramsPatch :: Int -> NgramsPatch
146 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
147
148 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
149 toEdit n p = Edit n p
150 ngramsIdPatch :: Patch NgramsId NgramsPatch
151 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
152 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
153 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
154 ]
155
156 -- applyPatchBack :: Patch -> IO Patch
157 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
158 -}
159 ------------------------------------------------------------------------
160 ------------------------------------------------------------------------
161 ------------------------------------------------------------------------
162
163 {-
164 -- TODO: Replace.old is ignored which means that if the current list
165 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
166 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
167 -- However this should not happen in non conflicting situations.
168 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
169 mkListsUpdate nt patches =
170 [ (ngramsTypeId nt, ng, listTypeId lt)
171 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
172 , lt <- patch ^.. patch_list . new
173 ]
174
175 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
176 -> NgramsType
177 -> NgramsTablePatch
178 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
179 mkChildrenGroups addOrRem nt patches =
180 [ (ngramsTypeId nt, parent, child)
181 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
182 , child <- patch ^.. patch_children . to addOrRem . folded
183 ]
184 -}
185
186 ------------------------------------------------------------------------
187
188 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
189 => m ()
190 saveRepo = liftBase =<< view repoSaver
191
192 listTypeConflictResolution :: ListType -> ListType -> ListType
193 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
194
195 ngramsStatePatchConflictResolution
196 :: TableNgrams.NgramsType
197 -> NodeId
198 -> NgramsTerm
199 -> ConflictResolutionNgramsPatch
200 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
201 = (ours, (const ours, ours), (False, False))
202 -- (False, False) mean here that Mod has always priority.
203 -- (True, False) <- would mean priority to the left (same as ours).
204
205 -- undefined {- TODO think this through -}, listTypeConflictResolution)
206
207 -- Current state:
208 -- Insertions are not considered as patches,
209 -- they do not extend history,
210 -- they do not bump version.
211 insertNewOnly :: a -> Maybe b -> a
212 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
213 -- TODO error handling
214
215 {- unused
216 -- TODO refactor with putListNgrams
217 copyListNgrams :: RepoCmdM env err m
218 => NodeId -> NodeId -> NgramsType
219 -> m ()
220 copyListNgrams srcListId dstListId ngramsType = do
221 var <- view repoVar
222 liftBase $ modifyMVar_ var $
223 pure . (r_state . at ngramsType %~ (Just . f . something))
224 saveRepo
225 where
226 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
227 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
228
229 -- TODO refactor with putListNgrams
230 -- The list must be non-empty!
231 -- The added ngrams must be non-existent!
232 addListNgrams :: RepoCmdM env err m
233 => NodeId -> NgramsType
234 -> [NgramsElement] -> m ()
235 addListNgrams listId ngramsType nes = do
236 var <- view repoVar
237 liftBase $ modifyMVar_ var $
238 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
239 saveRepo
240 where
241 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
242 -}
243
244 -- UNSAFE
245 rmListNgrams :: RepoCmdM env err m
246 => ListId
247 -> TableNgrams.NgramsType
248 -> m ()
249 rmListNgrams l nt = setListNgrams l nt mempty
250
251 -- | TODO: incr the Version number
252 -- && should use patch
253 -- UNSAFE
254 setListNgrams :: RepoCmdM env err m
255 => NodeId
256 -> TableNgrams.NgramsType
257 -> Map NgramsTerm NgramsRepoElement
258 -> m ()
259 setListNgrams listId ngramsType ns = do
260 var <- view repoVar
261 liftBase $ modifyMVar_ var $
262 pure . ( r_state
263 . at ngramsType %~
264 (Just .
265 (at listId .~ ( Just ns))
266 . something
267 )
268 )
269 saveRepo
270
271
272 currentVersion :: RepoCmdM env err m
273 => m Version
274 currentVersion = do
275 var <- view repoVar
276 r <- liftBase $ readMVar var
277 pure $ r ^. r_version
278
279
280 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
281 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
282 commitStatePatch (Versioned p_version p) = do
283 var <- view repoVar
284 vq' <- liftBase $ modifyMVar var $ \r -> do
285 let
286 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
287 (p', q') = transformWith ngramsStatePatchConflictResolution p q
288 r' = r & r_version +~ 1
289 & r_state %~ act p'
290 & r_history %~ (p' :)
291 {-
292 -- Ideally we would like to check these properties. However:
293 -- * They should be checked only to debug the code. The client data
294 -- should be able to trigger these.
295 -- * What kind of error should they throw (we are in IO here)?
296 -- * Should we keep modifyMVar?
297 -- * Should we throw the validation in an Exception, catch it around
298 -- modifyMVar and throw it back as an Error?
299 assertValid $ transformable p q
300 assertValid $ applicable p' (r ^. r_state)
301 -}
302 pure (r', Versioned (r' ^. r_version) q')
303
304 saveRepo
305 pure vq'
306
307 -- This is a special case of tableNgramsPut where the input patch is empty.
308 tableNgramsPull :: RepoCmdM env err m
309 => ListId
310 -> TableNgrams.NgramsType
311 -> Version
312 -> m (Versioned NgramsTablePatch)
313 tableNgramsPull listId ngramsType p_version = do
314 var <- view repoVar
315 r <- liftBase $ readMVar var
316
317 let
318 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
319 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
320
321 pure (Versioned (r ^. r_version) q_table)
322
323 -- Apply the given patch to the DB and returns the patch to be applied on the
324 -- client.
325 -- TODO-ACCESS check
326 tableNgramsPut :: ( FlowCmdM env err m
327 , HasSettings env
328 )
329 => TabType
330 -> ListId
331 -> Versioned NgramsTablePatch
332 -> m (Versioned NgramsTablePatch)
333 tableNgramsPut tabType listId (Versioned p_version p_table)
334 | p_table == mempty = do
335 let ngramsType = ngramsTypeFromTabType tabType
336 tableNgramsPull listId ngramsType p_version
337
338 | otherwise = do
339 let ngramsType = ngramsTypeFromTabType tabType
340 (p0, p0_validity) = PM.singleton listId p_table
341 (p, p_validity) = PM.singleton ngramsType p0
342
343 assertValid p0_validity
344 assertValid p_validity
345
346 ret <- commitStatePatch (Versioned p_version p)
347 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
348
349 pure ret
350
351
352 tableNgramsPutAsync :: ( FlowCmdM env err m
353 , HasSettings env
354 )
355 => UpdateTableNgrams
356 -> (JobLog -> m ())
357 -> m JobLog
358 tableNgramsPutAsync utn logStatus = do
359 -- let (Versioned p_version p_table) = utn ^. utn_patch
360 let tabType = utn ^. utn_tab_type
361 let listId = utn ^. utn_list_id
362
363 node <- getNode listId
364 let nId = node ^. node_id
365 _uId = node ^. node_userId
366 mCId = node ^. node_parentId
367
368 printDebug "[tableNgramsPut] tabType" tabType
369 printDebug "[tableNgramsPut] listId" listId
370
371 case mCId of
372 Nothing -> do
373 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
374 pure $ jobLogFail $ jobLogInit 1
375 Just cId -> do
376 case tabType of
377 Authors -> do
378 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
379 let jl = jobLogInit 1
380 logStatus jl
381 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
382 pure $ jobLogSuccess jl
383 Institutes -> do
384 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
385 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
386 let jl = jobLogInit 3
387 logStatus jl
388 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
389 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
390 let jl = jobLogSuccess jl
391 logStatus jl
392 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
393 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
394 let jl = jobLogSuccess jl
395 logStatus jl
396 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
397 pure $ jobLogSuccess jl
398 Sources -> do
399 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
400 let jl = jobLogInit 1
401 logStatus jl
402 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
403 pure $ jobLogSuccess jl
404 Terms -> do
405 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
406 let jl = jobLogInit 6
407 logStatus jl
408 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
409 let jl = jobLogSuccess jl
410 logStatus jl
411 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
412 let jl = jobLogSuccess jl
413 logStatus jl
414 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
415 let jl = jobLogSuccess jl
416 logStatus jl
417 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
418 let jl = jobLogSuccess jl
419 logStatus jl
420 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
421 let jl = jobLogSuccess jl
422 logStatus jl
423 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
424 pure $ jobLogSuccess jl
425 _ -> do
426 printDebug "[tableNgramsPut] no update for tabType = " tabType
427 pure $ jobLogFail $ jobLogInit 1
428
429 {-
430 { _ne_list :: ListType
431 If we merge the parents/children we can potentially create cycles!
432 , _ne_parent :: Maybe NgramsTerm
433 , _ne_children :: MSet NgramsTerm
434 }
435 -}
436
437 getNgramsTableMap :: RepoCmdM env err m
438 => NodeId
439 -> TableNgrams.NgramsType
440 -> m (Versioned NgramsTableMap)
441 getNgramsTableMap nodeId ngramsType = do
442 v <- view repoVar
443 repo <- liftBase $ readMVar v
444 pure $ Versioned (repo ^. r_version)
445 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
446
447 dumpJsonTableMap :: RepoCmdM env err m
448 => Text
449 -> NodeId
450 -> TableNgrams.NgramsType
451 -> m ()
452 dumpJsonTableMap fpath nodeId ngramsType = do
453 m <- getNgramsTableMap nodeId ngramsType
454 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
455 pure ()
456
457 type MinSize = Int
458 type MaxSize = Int
459
460 -- | TODO Errors management
461 -- TODO: polymorphic for Annuaire or Corpus or ...
462 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
463 -- TODO: should take only one ListId
464
465 getTime' :: MonadBase IO m => m TimeSpec
466 getTime' = liftBase $ getTime ProcessCPUTime
467
468
469 getTableNgrams :: forall env err m.
470 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
471 => NodeType -> NodeId -> TabType
472 -> ListId -> Limit -> Maybe Offset
473 -> Maybe ListType
474 -> Maybe MinSize -> Maybe MaxSize
475 -> Maybe OrderBy
476 -> (NgramsTerm -> Bool)
477 -> m (Versioned NgramsTable)
478 getTableNgrams _nType nId tabType listId limit_ offset
479 listType minSize maxSize orderBy searchQuery = do
480
481 t0 <- getTime'
482 -- lIds <- selectNodesWithUsername NodeList userMaster
483 let
484 ngramsType = ngramsTypeFromTabType tabType
485 offset' = maybe 0 identity offset
486 listType' = maybe (const True) (==) listType
487 minSize' = maybe (const True) (<=) minSize
488 maxSize' = maybe (const True) (>=) maxSize
489
490 selected_node n = minSize' s
491 && maxSize' s
492 && searchQuery (n ^. ne_ngrams)
493 && listType' (n ^. ne_list)
494 where
495 s = n ^. ne_size
496
497 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
498
499 ---------------------------------------
500 sortOnOrder Nothing = identity
501 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
502 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
503 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
504 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
505
506 ---------------------------------------
507 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
508 selectAndPaginate tableMap = roots <> inners
509 where
510 list = tableMap ^.. each
511 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
512 (ne ^. ne_root)
513 selected_nodes = list & take limit_
514 . drop offset'
515 . filter selected_node
516 . sortOnOrder orderBy
517 roots = rootOf <$> selected_nodes
518 rootsSet = Set.fromList (_ne_ngrams <$> roots)
519 inners = list & filter (selected_inner rootsSet)
520
521 ---------------------------------------
522 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
523 setScores False table = pure table
524 setScores True table = do
525 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
526 t1 <- getTime'
527 occurrences <- getOccByNgramsOnlyFast' nId
528 listId
529 ngramsType
530 ngrams_terms
531 t2 <- getTime'
532 liftBase $ hprint stderr
533 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
534 (length ngrams_terms) t1 t2
535 {-
536 occurrences <- getOccByNgramsOnlySlow nType nId
537 (lIds <> [listId])
538 ngramsType
539 ngrams_terms
540 -}
541 let
542 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
543
544 pure $ table & each %~ setOcc
545 ---------------------------------------
546
547 -- lists <- catMaybes <$> listsWith userMaster
548 -- trace (show lists) $
549 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
550
551 let scoresNeeded = needsScores orderBy
552 tableMap1 <- getNgramsTableMap listId ngramsType
553 t1 <- getTime'
554 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
555 . Map.mapWithKey ngramsElementFromRepo
556 t2 <- getTime'
557 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
558 . setScores (not scoresNeeded)
559 . selectAndPaginate
560 t3 <- getTime'
561 liftBase $ hprint stderr
562 ("getTableNgrams total=" % timeSpecs
563 % " map1=" % timeSpecs
564 % " map2=" % timeSpecs
565 % " map3=" % timeSpecs
566 % " sql=" % (if scoresNeeded then "map2" else "map3")
567 % "\n"
568 ) t0 t3 t0 t1 t1 t2 t2 t3
569 pure tableMap3
570
571
572 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
573 scoresRecomputeTableNgrams nId tabType listId = do
574 tableMap <- getNgramsTableMap listId ngramsType
575 _ <- tableMap & v_data %%~ setScores
576 . Map.mapWithKey ngramsElementFromRepo
577
578 pure $ 1
579 where
580 ngramsType = ngramsTypeFromTabType tabType
581
582 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
583 setScores table = do
584 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
585 occurrences <- getOccByNgramsOnlyFast' nId
586 listId
587 ngramsType
588 ngrams_terms
589 let
590 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
591
592 pure $ table & each %~ setOcc
593
594
595
596 -- APIs
597
598 -- TODO: find a better place for the code above, All APIs stay here
599
600 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
601 deriving (Generic, Enum, Bounded, Read, Show)
602
603 instance FromHttpApiData OrderBy
604 where
605 parseUrlPiece "TermAsc" = pure TermAsc
606 parseUrlPiece "TermDesc" = pure TermDesc
607 parseUrlPiece "ScoreAsc" = pure ScoreAsc
608 parseUrlPiece "ScoreDesc" = pure ScoreDesc
609 parseUrlPiece _ = Left "Unexpected value of OrderBy"
610
611
612 instance ToParamSchema OrderBy
613 instance FromJSON OrderBy
614 instance ToJSON OrderBy
615 instance ToSchema OrderBy
616 instance Arbitrary OrderBy
617 where
618 arbitrary = elements [minBound..maxBound]
619
620 needsScores :: Maybe OrderBy -> Bool
621 needsScores (Just ScoreAsc) = True
622 needsScores (Just ScoreDesc) = True
623 needsScores _ = False
624
625 type TableNgramsApiGet = Summary " Table Ngrams API Get"
626 :> QueryParamR "ngramsType" TabType
627 :> QueryParamR "list" ListId
628 :> QueryParamR "limit" Limit
629 :> QueryParam "offset" Offset
630 :> QueryParam "listType" ListType
631 :> QueryParam "minTermSize" MinSize
632 :> QueryParam "maxTermSize" MaxSize
633 :> QueryParam "orderBy" OrderBy
634 :> QueryParam "search" Text
635 :> Get '[JSON] (Versioned NgramsTable)
636
637 type TableNgramsApiPut = Summary " Table Ngrams API Change"
638 :> QueryParamR "ngramsType" TabType
639 :> QueryParamR "list" ListId
640 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
641 :> Put '[JSON] (Versioned NgramsTablePatch)
642
643 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
644 :> QueryParamR "ngramsType" TabType
645 :> QueryParamR "list" ListId
646 :> "recompute" :> Post '[JSON] Int
647
648 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
649 :> QueryParamR "ngramsType" TabType
650 :> QueryParamR "list" ListId
651 :> Get '[JSON] Version
652
653 type TableNgramsApi = TableNgramsApiGet
654 :<|> TableNgramsApiPut
655 :<|> RecomputeScoresNgramsApiGet
656 :<|> "version" :> TableNgramsApiGetVersion
657 :<|> TableNgramsAsyncApi
658
659 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
660 :> "async"
661 :> "update"
662 :> AsyncJobs JobLog '[JSON] UpdateTableNgrams JobLog
663
664 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
665 => NodeId
666 -> TabType
667 -> ListId
668 -> Limit
669 -> Maybe Offset
670 -> Maybe ListType
671 -> Maybe MinSize -> Maybe MaxSize
672 -> Maybe OrderBy
673 -> Maybe Text -- full text search
674 -> m (Versioned NgramsTable)
675 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
676 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
677 where
678 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
679
680 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
681 => NodeId
682 -> TabType
683 -> ListId
684 -> m Version
685 getTableNgramsVersion _nId _tabType _listId = currentVersion
686 -- TODO: limit?
687 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
688 -- This line above looks like a waste of computation to finally get only the version.
689 -- See the comment about listNgramsChangedSince.
690
691
692 -- | Text search is deactivated for now for ngrams by doc only
693 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
694 => DocId -> TabType
695 -> ListId -> Limit -> Maybe Offset
696 -> Maybe ListType
697 -> Maybe MinSize -> Maybe MaxSize
698 -> Maybe OrderBy
699 -> Maybe Text -- full text search
700 -> m (Versioned NgramsTable)
701 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
702 ns <- selectNodesWithUsername NodeList userMaster
703 let ngramsType = ngramsTypeFromTabType tabType
704 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
705 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
706 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
707
708
709
710 apiNgramsTableCorpus :: ( GargServerC env err m
711 )
712 => NodeId -> ServerT TableNgramsApi m
713 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
714 :<|> tableNgramsPut
715 :<|> scoresRecomputeTableNgrams cId
716 :<|> getTableNgramsVersion cId
717 :<|> apiNgramsAsync cId
718
719 apiNgramsTableDoc :: ( GargServerC env err m
720 )
721 => DocId -> ServerT TableNgramsApi m
722 apiNgramsTableDoc dId = getTableNgramsDoc dId
723 :<|> tableNgramsPut
724 :<|> scoresRecomputeTableNgrams dId
725 :<|> getTableNgramsVersion dId
726 :<|> apiNgramsAsync dId
727 -- > index all the corpus accordingly (TODO AD)
728
729 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
730 apiNgramsAsync _dId =
731 serveJobsAPI $
732 JobFunction (\i l ->
733 let
734 log' x = do
735 printDebug "tableNgramsPutAsync" x
736 liftBase $ l x
737 in tableNgramsPutAsync i log')
738
739 -- Did the given list of ngrams changed since the given version?
740 -- The returned value is versioned boolean value, meaning that one always retrieve the
741 -- latest version.
742 -- If the given version is negative then one simply receive the latest version and True.
743 -- Using this function is more precise than simply comparing the latest version number
744 -- with the local version number. Indeed there might be no change to this particular list
745 -- and still the version number has changed because of other lists.
746 --
747 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
748 -- * currentVersion: good computation, good bandwidth, bad precision.
749 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
750 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
751 listNgramsChangedSince :: RepoCmdM env err m
752 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
753 listNgramsChangedSince listId ngramsType version
754 | version < 0 =
755 Versioned <$> currentVersion <*> pure True
756 | otherwise =
757 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)