]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Merge branch 'dev-charts-update-economy' of ssh://gitlab.iscpif.fr:20022/gargantext...
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TemplateHaskell #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
34 , apiNgramsTableDoc
35
36 , NgramsStatePatch
37 , NgramsTablePatch
38 , NgramsTableMap
39
40 , NgramsTerm(..)
41
42 , NgramsElement(..)
43 , mkNgramsElement
44
45 , RootParent(..)
46
47 , MSet
48 , mSetFromList
49 , mSetToList
50
51 , Repo(..)
52 , r_version
53 , r_state
54 , r_history
55 , NgramsRepo
56 , NgramsRepoElement(..)
57 , saveRepo
58 , initRepo
59
60 , RepoEnv(..)
61 , renv_var
62 , renv_lock
63
64 , TabType(..)
65
66 , HasRepoVar(..)
67 , HasRepoSaver(..)
68 , HasRepo(..)
69 , RepoCmdM
70 , QueryParamR
71 , TODO
72
73 -- Internals
74 , getNgramsTableMap
75 , dumpJsonTableMap
76 , tableNgramsPull
77 , tableNgramsPut
78
79 , Version
80 , Versioned(..)
81 , currentVersion
82 , listNgramsChangedSince
83 )
84 where
85
86 import Control.Concurrent
87 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped)
88 import Control.Monad.Reader
89 import Data.Aeson hiding ((.=))
90 import qualified Data.Aeson.Text as DAT
91 import Data.Either (Either(..))
92 import Data.Foldable
93 import qualified Data.List as List
94 import Data.Map.Strict (Map)
95 import qualified Data.Map.Strict as Map
96 import qualified Data.Map.Strict.Patch as PM
97 import Data.Maybe (fromMaybe)
98 import Data.Monoid
99 import Data.Ord (Down(..))
100 import Data.Patch.Class (Action(act), Transformable(..), ours)
101 import qualified Data.Set as S
102 import qualified Data.Set as Set
103 import Data.Swagger hiding (version, patch)
104 import Data.Text (Text, isInfixOf, unpack)
105 import Data.Text.Lazy.IO as DTL
106 import Formatting (hprint, int, (%))
107 import Formatting.Clock (timeSpecs)
108 import GHC.Generics (Generic)
109 import Servant hiding (Patch)
110 import System.Clock (getTime, TimeSpec, Clock(..))
111 import System.IO (stderr)
112 import Test.QuickCheck (elements)
113 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
114
115 import Prelude (error)
116 import Gargantext.Prelude
117
118 import Gargantext.API.Admin.Types (HasSettings)
119 import qualified Gargantext.API.Metrics as Metrics
120 import Gargantext.API.Ngrams.Types
121 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, TODO, assertValid)
122 import Gargantext.Core.Utils (something)
123 -- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
124 -- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
125 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
126 import Gargantext.Database.Query.Table.Node.Select
127 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
128 import Gargantext.Database.Admin.Config (userMaster)
129 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
130 import Gargantext.Database.Admin.Types.Node (NodeType(..))
131 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
132 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
133 import Gargantext.Database.Query.Table.Node (getNode)
134 import Gargantext.Database.Schema.Node (NodePoly(..))
135
136 {-
137 -- TODO sequences of modifications (Patchs)
138 type NgramsIdPatch = Patch NgramsId NgramsPatch
139
140 ngramsPatch :: Int -> NgramsPatch
141 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
142
143 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
144 toEdit n p = Edit n p
145 ngramsIdPatch :: Patch NgramsId NgramsPatch
146 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
147 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
148 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
149 ]
150
151 -- applyPatchBack :: Patch -> IO Patch
152 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
153 -}
154 ------------------------------------------------------------------------
155 ------------------------------------------------------------------------
156 ------------------------------------------------------------------------
157
158 {-
159 -- TODO: Replace.old is ignored which means that if the current list
160 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
161 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
162 -- However this should not happen in non conflicting situations.
163 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
164 mkListsUpdate nt patches =
165 [ (ngramsTypeId nt, ng, listTypeId lt)
166 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
167 , lt <- patch ^.. patch_list . new
168 ]
169
170 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
171 -> NgramsType
172 -> NgramsTablePatch
173 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
174 mkChildrenGroups addOrRem nt patches =
175 [ (ngramsTypeId nt, parent, child)
176 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
177 , child <- patch ^.. patch_children . to addOrRem . folded
178 ]
179 -}
180
181 ------------------------------------------------------------------------
182
183 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
184 => m ()
185 saveRepo = liftBase =<< view repoSaver
186
187 listTypeConflictResolution :: ListType -> ListType -> ListType
188 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
189
190 ngramsStatePatchConflictResolution
191 :: TableNgrams.NgramsType
192 -> NodeId
193 -> NgramsTerm
194 -> ConflictResolutionNgramsPatch
195 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
196 = (ours, (const ours, ours), (False, False))
197 -- (False, False) mean here that Mod has always priority.
198 -- (True, False) <- would mean priority to the left (same as ours).
199
200 -- undefined {- TODO think this through -}, listTypeConflictResolution)
201
202 -- Current state:
203 -- Insertions are not considered as patches,
204 -- they do not extend history,
205 -- they do not bump version.
206 insertNewOnly :: a -> Maybe b -> a
207 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
208 -- TODO error handling
209
210 {- unused
211 -- TODO refactor with putListNgrams
212 copyListNgrams :: RepoCmdM env err m
213 => NodeId -> NodeId -> NgramsType
214 -> m ()
215 copyListNgrams srcListId dstListId ngramsType = do
216 var <- view repoVar
217 liftBase $ modifyMVar_ var $
218 pure . (r_state . at ngramsType %~ (Just . f . something))
219 saveRepo
220 where
221 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
222 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
223
224 -- TODO refactor with putListNgrams
225 -- The list must be non-empty!
226 -- The added ngrams must be non-existent!
227 addListNgrams :: RepoCmdM env err m
228 => NodeId -> NgramsType
229 -> [NgramsElement] -> m ()
230 addListNgrams listId ngramsType nes = do
231 var <- view repoVar
232 liftBase $ modifyMVar_ var $
233 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
234 saveRepo
235 where
236 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
237 -}
238
239 -- UNSAFE
240 rmListNgrams :: RepoCmdM env err m
241 => ListId
242 -> TableNgrams.NgramsType
243 -> m ()
244 rmListNgrams l nt = setListNgrams l nt mempty
245
246 -- | TODO: incr the Version number
247 -- && should use patch
248 -- UNSAFE
249 setListNgrams :: RepoCmdM env err m
250 => NodeId
251 -> TableNgrams.NgramsType
252 -> Map NgramsTerm NgramsRepoElement
253 -> m ()
254 setListNgrams listId ngramsType ns = do
255 var <- view repoVar
256 liftBase $ modifyMVar_ var $
257 pure . ( r_state
258 . at ngramsType %~
259 (Just .
260 (at listId .~ ( Just ns))
261 . something
262 )
263 )
264 saveRepo
265
266
267 currentVersion :: RepoCmdM env err m
268 => m Version
269 currentVersion = do
270 var <- view repoVar
271 r <- liftBase $ readMVar var
272 pure $ r ^. r_version
273
274
275 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
276 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
277 commitStatePatch (Versioned p_version p) = do
278 var <- view repoVar
279 vq' <- liftBase $ modifyMVar var $ \r -> do
280 let
281 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
282 (p', q') = transformWith ngramsStatePatchConflictResolution p q
283 r' = r & r_version +~ 1
284 & r_state %~ act p'
285 & r_history %~ (p' :)
286 {-
287 -- Ideally we would like to check these properties. However:
288 -- * They should be checked only to debug the code. The client data
289 -- should be able to trigger these.
290 -- * What kind of error should they throw (we are in IO here)?
291 -- * Should we keep modifyMVar?
292 -- * Should we throw the validation in an Exception, catch it around
293 -- modifyMVar and throw it back as an Error?
294 assertValid $ transformable p q
295 assertValid $ applicable p' (r ^. r_state)
296 -}
297 pure (r', Versioned (r' ^. r_version) q')
298
299 saveRepo
300 pure vq'
301
302 -- This is a special case of tableNgramsPut where the input patch is empty.
303 tableNgramsPull :: RepoCmdM env err m
304 => ListId
305 -> TableNgrams.NgramsType
306 -> Version
307 -> m (Versioned NgramsTablePatch)
308 tableNgramsPull listId ngramsType p_version = do
309 var <- view repoVar
310 r <- liftBase $ readMVar var
311
312 let
313 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
314 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
315
316 pure (Versioned (r ^. r_version) q_table)
317
318 -- Apply the given patch to the DB and returns the patch to be applied on the
319 -- client.
320 -- TODO-ACCESS check
321 tableNgramsPut :: (HasNodeError err,
322 HasInvalidError err,
323 HasConfig env,
324 HasConnectionPool env,
325 HasSettings env,
326 RepoCmdM env err m)
327 => TabType
328 -> ListId
329 -> Versioned NgramsTablePatch
330 -> m (Versioned NgramsTablePatch)
331 tableNgramsPut tabType listId (Versioned p_version p_table)
332 | p_table == mempty = do
333 let ngramsType = ngramsTypeFromTabType tabType
334 tableNgramsPull listId ngramsType p_version
335
336 | otherwise = do
337 let ngramsType = ngramsTypeFromTabType tabType
338 (p0, p0_validity) = PM.singleton listId p_table
339 (p, p_validity) = PM.singleton ngramsType p0
340
341 assertValid p0_validity
342 assertValid p_validity
343
344 ret <- commitStatePatch (Versioned p_version p)
345 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
346
347 node <- getNode listId
348 let nId = _node_id node
349 _uId = _node_userId node
350 mCId = _node_parentId node
351 -- printDebug "[tableNgramsPut] updating graph with nId" nId
352 -- printDebug "[tableNgramsPut] updating graph with uId" uId
353 -- _ <- recomputeGraph uId nId Conditional
354
355 printDebug "[tableNgramsPut] tabType" tabType
356 printDebug "[tableNgramsPut] listId" listId
357
358 _ <- case mCId of
359 Nothing -> do
360 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
361 pure ()
362 Just cId -> do
363 case tabType of
364 Authors -> do
365 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
366 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
367 pure ()
368 Institutes -> do
369 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
370 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
371 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
372 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
373 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
374 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
375 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
376 pure ()
377 Sources -> do
378 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
379 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
380 pure ()
381 Terms -> do
382 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
383 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
384 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
385 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
386 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
387 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
388 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
389 pure ()
390 _ -> do
391 printDebug "[tableNgramsPut] no update for tabType = " tabType
392 pure ()
393 pure ()
394
395 pure ret
396
397 {-
398 { _ne_list :: ListType
399 If we merge the parents/children we can potentially create cycles!
400 , _ne_parent :: Maybe NgramsTerm
401 , _ne_children :: MSet NgramsTerm
402 }
403 -}
404
405 getNgramsTableMap :: RepoCmdM env err m
406 => NodeId
407 -> TableNgrams.NgramsType
408 -> m (Versioned NgramsTableMap)
409 getNgramsTableMap nodeId ngramsType = do
410 v <- view repoVar
411 repo <- liftBase $ readMVar v
412 pure $ Versioned (repo ^. r_version)
413 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
414
415 dumpJsonTableMap :: RepoCmdM env err m
416 => Text
417 -> NodeId
418 -> TableNgrams.NgramsType
419 -> m ()
420 dumpJsonTableMap fpath nodeId ngramsType = do
421 m <- getNgramsTableMap nodeId ngramsType
422 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
423 pure ()
424
425 type MinSize = Int
426 type MaxSize = Int
427
428 -- | TODO Errors management
429 -- TODO: polymorphic for Annuaire or Corpus or ...
430 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
431 -- TODO: should take only one ListId
432
433 getTime' :: MonadBase IO m => m TimeSpec
434 getTime' = liftBase $ getTime ProcessCPUTime
435
436
437 getTableNgrams :: forall env err m.
438 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
439 => NodeType -> NodeId -> TabType
440 -> ListId -> Limit -> Maybe Offset
441 -> Maybe ListType
442 -> Maybe MinSize -> Maybe MaxSize
443 -> Maybe OrderBy
444 -> (NgramsTerm -> Bool)
445 -> m (Versioned NgramsTable)
446 getTableNgrams _nType nId tabType listId limit_ offset
447 listType minSize maxSize orderBy searchQuery = do
448
449 t0 <- getTime'
450 -- lIds <- selectNodesWithUsername NodeList userMaster
451 let
452 ngramsType = ngramsTypeFromTabType tabType
453 offset' = maybe 0 identity offset
454 listType' = maybe (const True) (==) listType
455 minSize' = maybe (const True) (<=) minSize
456 maxSize' = maybe (const True) (>=) maxSize
457
458 selected_node n = minSize' s
459 && maxSize' s
460 && searchQuery (n ^. ne_ngrams)
461 && listType' (n ^. ne_list)
462 where
463 s = n ^. ne_size
464
465 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
466
467 ---------------------------------------
468 sortOnOrder Nothing = identity
469 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
470 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
471 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
472 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
473
474 ---------------------------------------
475 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
476 selectAndPaginate tableMap = roots <> inners
477 where
478 list = tableMap ^.. each
479 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
480 (ne ^. ne_root)
481 selected_nodes = list & take limit_
482 . drop offset'
483 . filter selected_node
484 . sortOnOrder orderBy
485 roots = rootOf <$> selected_nodes
486 rootsSet = Set.fromList (_ne_ngrams <$> roots)
487 inners = list & filter (selected_inner rootsSet)
488
489 ---------------------------------------
490 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
491 setScores False table = pure table
492 setScores True table = do
493 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
494 t1 <- getTime'
495 occurrences <- getOccByNgramsOnlyFast' nId
496 listId
497 ngramsType
498 ngrams_terms
499 t2 <- getTime'
500 liftBase $ hprint stderr
501 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
502 (length ngrams_terms) t1 t2
503 {-
504 occurrences <- getOccByNgramsOnlySlow nType nId
505 (lIds <> [listId])
506 ngramsType
507 ngrams_terms
508 -}
509 let
510 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
511
512 pure $ table & each %~ setOcc
513 ---------------------------------------
514
515 -- lists <- catMaybes <$> listsWith userMaster
516 -- trace (show lists) $
517 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
518
519 let scoresNeeded = needsScores orderBy
520 tableMap1 <- getNgramsTableMap listId ngramsType
521 t1 <- getTime'
522 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
523 . Map.mapWithKey ngramsElementFromRepo
524 t2 <- getTime'
525 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
526 . setScores (not scoresNeeded)
527 . selectAndPaginate
528 t3 <- getTime'
529 liftBase $ hprint stderr
530 ("getTableNgrams total=" % timeSpecs
531 % " map1=" % timeSpecs
532 % " map2=" % timeSpecs
533 % " map3=" % timeSpecs
534 % " sql=" % (if scoresNeeded then "map2" else "map3")
535 % "\n"
536 ) t0 t3 t0 t1 t1 t2 t2 t3
537 pure tableMap3
538
539
540 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
541 scoresRecomputeTableNgrams nId tabType listId = do
542 tableMap <- getNgramsTableMap listId ngramsType
543 _ <- tableMap & v_data %%~ setScores
544 . Map.mapWithKey ngramsElementFromRepo
545
546 pure $ 1
547 where
548 ngramsType = ngramsTypeFromTabType tabType
549
550 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
551 setScores table = do
552 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
553 occurrences <- getOccByNgramsOnlyFast' nId
554 listId
555 ngramsType
556 ngrams_terms
557 let
558 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
559
560 pure $ table & each %~ setOcc
561
562
563
564 -- APIs
565
566 -- TODO: find a better place for the code above, All APIs stay here
567
568 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
569 deriving (Generic, Enum, Bounded, Read, Show)
570
571 instance FromHttpApiData OrderBy
572 where
573 parseUrlPiece "TermAsc" = pure TermAsc
574 parseUrlPiece "TermDesc" = pure TermDesc
575 parseUrlPiece "ScoreAsc" = pure ScoreAsc
576 parseUrlPiece "ScoreDesc" = pure ScoreDesc
577 parseUrlPiece _ = Left "Unexpected value of OrderBy"
578
579
580 instance ToParamSchema OrderBy
581 instance FromJSON OrderBy
582 instance ToJSON OrderBy
583 instance ToSchema OrderBy
584 instance Arbitrary OrderBy
585 where
586 arbitrary = elements [minBound..maxBound]
587
588 needsScores :: Maybe OrderBy -> Bool
589 needsScores (Just ScoreAsc) = True
590 needsScores (Just ScoreDesc) = True
591 needsScores _ = False
592
593 type TableNgramsApiGet = Summary " Table Ngrams API Get"
594 :> QueryParamR "ngramsType" TabType
595 :> QueryParamR "list" ListId
596 :> QueryParamR "limit" Limit
597 :> QueryParam "offset" Offset
598 :> QueryParam "listType" ListType
599 :> QueryParam "minTermSize" MinSize
600 :> QueryParam "maxTermSize" MaxSize
601 :> QueryParam "orderBy" OrderBy
602 :> QueryParam "search" Text
603 :> Get '[JSON] (Versioned NgramsTable)
604
605 type TableNgramsApiPut = Summary " Table Ngrams API Change"
606 :> QueryParamR "ngramsType" TabType
607 :> QueryParamR "list" ListId
608 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
609 :> Put '[JSON] (Versioned NgramsTablePatch)
610
611 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
612 :> QueryParamR "ngramsType" TabType
613 :> QueryParamR "list" ListId
614 :> "recompute" :> Post '[JSON] Int
615
616 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
617 :> QueryParamR "ngramsType" TabType
618 :> QueryParamR "list" ListId
619 :> Get '[JSON] Version
620
621 type TableNgramsApi = TableNgramsApiGet
622 :<|> TableNgramsApiPut
623 :<|> RecomputeScoresNgramsApiGet
624 :<|> "version" :> TableNgramsApiGetVersion
625
626 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
627 => NodeId
628 -> TabType
629 -> ListId
630 -> Limit
631 -> Maybe Offset
632 -> Maybe ListType
633 -> Maybe MinSize -> Maybe MaxSize
634 -> Maybe OrderBy
635 -> Maybe Text -- full text search
636 -> m (Versioned NgramsTable)
637 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
638 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
639 where
640 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
641
642 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
643 => NodeId
644 -> TabType
645 -> ListId
646 -> m Version
647 getTableNgramsVersion _nId _tabType _listId = currentVersion
648 -- TODO: limit?
649 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
650 -- This line above looks like a waste of computation to finally get only the version.
651 -- See the comment about listNgramsChangedSince.
652
653
654 -- | Text search is deactivated for now for ngrams by doc only
655 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
656 => DocId -> TabType
657 -> ListId -> Limit -> Maybe Offset
658 -> Maybe ListType
659 -> Maybe MinSize -> Maybe MaxSize
660 -> Maybe OrderBy
661 -> Maybe Text -- full text search
662 -> m (Versioned NgramsTable)
663 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
664 ns <- selectNodesWithUsername NodeList userMaster
665 let ngramsType = ngramsTypeFromTabType tabType
666 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
667 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
668 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
669
670
671
672 apiNgramsTableCorpus :: ( RepoCmdM env err m
673 , HasNodeError err
674 , HasInvalidError err
675 , HasConnectionPool env
676 , HasConfig env
677 , HasSettings env
678 )
679 => NodeId -> ServerT TableNgramsApi m
680 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
681 :<|> tableNgramsPut
682 :<|> scoresRecomputeTableNgrams cId
683 :<|> getTableNgramsVersion cId
684
685 apiNgramsTableDoc :: ( RepoCmdM env err m
686 , HasNodeError err
687 , HasInvalidError err
688 , HasConnectionPool env
689 , HasConfig env
690 , HasSettings env
691 )
692 => DocId -> ServerT TableNgramsApi m
693 apiNgramsTableDoc dId = getTableNgramsDoc dId
694 :<|> tableNgramsPut
695 :<|> scoresRecomputeTableNgrams dId
696 :<|> getTableNgramsVersion dId
697 -- > index all the corpus accordingly (TODO AD)
698
699 -- Did the given list of ngrams changed since the given version?
700 -- The returned value is versioned boolean value, meaning that one always retrieve the
701 -- latest version.
702 -- If the given version is negative then one simply receive the latest version and True.
703 -- Using this function is more precise than simply comparing the latest version number
704 -- with the local version number. Indeed there might be no change to this particular list
705 -- and still the version number has changed because of other lists.
706 --
707 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
708 -- * currentVersion: good computation, good bandwidth, bad precision.
709 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
710 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
711 listNgramsChangedSince :: RepoCmdM env err m
712 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
713 listNgramsChangedSince listId ngramsType version
714 | version < 0 =
715 Versioned <$> currentVersion <*> pure True
716 | otherwise =
717 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)