]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[Doc] Proxemy vars
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TemplateHaskell #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
34 , apiNgramsTableDoc
35
36 , NgramsStatePatch
37 , NgramsTablePatch
38 , NgramsTableMap
39
40 , NgramsTerm(..)
41
42 , NgramsElement(..)
43 , mkNgramsElement
44
45 , RootParent(..)
46
47 , MSet
48 , mSetFromList
49 , mSetToList
50
51 , Repo(..)
52 , r_version
53 , r_state
54 , r_history
55 , NgramsRepo
56 , NgramsRepoElement(..)
57 , saveRepo
58 , initRepo
59
60 , RepoEnv(..)
61 , renv_var
62 , renv_lock
63
64 , TabType(..)
65
66 , HasRepoVar(..)
67 , HasRepoSaver(..)
68 , HasRepo(..)
69 , RepoCmdM
70 , QueryParamR
71 , TODO
72
73 -- Internals
74 , getNgramsTableMap
75 , dumpJsonTableMap
76 , tableNgramsPull
77 , tableNgramsPut
78
79 , Version
80 , Versioned(..)
81 , currentVersion
82 , listNgramsChangedSince
83 )
84 where
85
86 import Control.Concurrent
87 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped)
88 import Control.Monad.Reader
89 import Data.Aeson hiding ((.=))
90 import qualified Data.Aeson.Text as DAT
91 import Data.Either (Either(..))
92 import Data.Foldable
93 import qualified Data.List as List
94 import Data.Map.Strict (Map)
95 import qualified Data.Map.Strict as Map
96 import qualified Data.Map.Strict.Patch as PM
97 import Data.Maybe (fromMaybe)
98 import Data.Monoid
99 import Data.Ord (Down(..))
100 import Data.Patch.Class (Action(act), Transformable(..), ours)
101 import qualified Data.Set as S
102 import qualified Data.Set as Set
103 import Data.Swagger hiding (version, patch)
104 import Data.Text (Text, isInfixOf, unpack)
105 import Data.Text.Lazy.IO as DTL
106 import Formatting (hprint, int, (%))
107 import Formatting.Clock (timeSpecs)
108 import GHC.Generics (Generic)
109 import Servant hiding (Patch)
110 import System.Clock (getTime, TimeSpec, Clock(..))
111 import System.IO (stderr)
112 import Test.QuickCheck (elements)
113 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
114
115 import Prelude (error)
116 import Gargantext.Prelude
117
118 import Gargantext.API.Admin.Types (HasSettings)
119 import qualified Gargantext.API.Metrics as Metrics
120 import Gargantext.API.Ngrams.Types
121 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, TODO, assertValid)
122 import Gargantext.Core.Utils (something)
123 -- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
124 -- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
125 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
126 import Gargantext.Database.Query.Table.Node.Select
127 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
128 import Gargantext.Database.Admin.Config (userMaster)
129 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
130 import Gargantext.Database.Admin.Types.Node (NodeType(..))
131 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
132 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
133 import Gargantext.Database.Query.Table.Node (getNode)
134 import Gargantext.Database.Query.Tree.Error (HasTreeError)
135 import Gargantext.Database.Schema.Node (node_id, node_parentId, node_userId)
136
137 {-
138 -- TODO sequences of modifications (Patchs)
139 type NgramsIdPatch = Patch NgramsId NgramsPatch
140
141 ngramsPatch :: Int -> NgramsPatch
142 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
143
144 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
145 toEdit n p = Edit n p
146 ngramsIdPatch :: Patch NgramsId NgramsPatch
147 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
148 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
149 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
150 ]
151
152 -- applyPatchBack :: Patch -> IO Patch
153 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
154 -}
155 ------------------------------------------------------------------------
156 ------------------------------------------------------------------------
157 ------------------------------------------------------------------------
158
159 {-
160 -- TODO: Replace.old is ignored which means that if the current list
161 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
162 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
163 -- However this should not happen in non conflicting situations.
164 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
165 mkListsUpdate nt patches =
166 [ (ngramsTypeId nt, ng, listTypeId lt)
167 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
168 , lt <- patch ^.. patch_list . new
169 ]
170
171 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
172 -> NgramsType
173 -> NgramsTablePatch
174 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
175 mkChildrenGroups addOrRem nt patches =
176 [ (ngramsTypeId nt, parent, child)
177 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
178 , child <- patch ^.. patch_children . to addOrRem . folded
179 ]
180 -}
181
182 ------------------------------------------------------------------------
183
184 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
185 => m ()
186 saveRepo = liftBase =<< view repoSaver
187
188 listTypeConflictResolution :: ListType -> ListType -> ListType
189 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
190
191 ngramsStatePatchConflictResolution
192 :: TableNgrams.NgramsType
193 -> NodeId
194 -> NgramsTerm
195 -> ConflictResolutionNgramsPatch
196 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
197 = (ours, (const ours, ours), (False, False))
198 -- (False, False) mean here that Mod has always priority.
199 -- (True, False) <- would mean priority to the left (same as ours).
200
201 -- undefined {- TODO think this through -}, listTypeConflictResolution)
202
203 -- Current state:
204 -- Insertions are not considered as patches,
205 -- they do not extend history,
206 -- they do not bump version.
207 insertNewOnly :: a -> Maybe b -> a
208 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
209 -- TODO error handling
210
211 {- unused
212 -- TODO refactor with putListNgrams
213 copyListNgrams :: RepoCmdM env err m
214 => NodeId -> NodeId -> NgramsType
215 -> m ()
216 copyListNgrams srcListId dstListId ngramsType = do
217 var <- view repoVar
218 liftBase $ modifyMVar_ var $
219 pure . (r_state . at ngramsType %~ (Just . f . something))
220 saveRepo
221 where
222 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
223 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
224
225 -- TODO refactor with putListNgrams
226 -- The list must be non-empty!
227 -- The added ngrams must be non-existent!
228 addListNgrams :: RepoCmdM env err m
229 => NodeId -> NgramsType
230 -> [NgramsElement] -> m ()
231 addListNgrams listId ngramsType nes = do
232 var <- view repoVar
233 liftBase $ modifyMVar_ var $
234 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
235 saveRepo
236 where
237 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
238 -}
239
240 -- UNSAFE
241 rmListNgrams :: RepoCmdM env err m
242 => ListId
243 -> TableNgrams.NgramsType
244 -> m ()
245 rmListNgrams l nt = setListNgrams l nt mempty
246
247 -- | TODO: incr the Version number
248 -- && should use patch
249 -- UNSAFE
250 setListNgrams :: RepoCmdM env err m
251 => NodeId
252 -> TableNgrams.NgramsType
253 -> Map NgramsTerm NgramsRepoElement
254 -> m ()
255 setListNgrams listId ngramsType ns = do
256 var <- view repoVar
257 liftBase $ modifyMVar_ var $
258 pure . ( r_state
259 . at ngramsType %~
260 (Just .
261 (at listId .~ ( Just ns))
262 . something
263 )
264 )
265 saveRepo
266
267
268 currentVersion :: RepoCmdM env err m
269 => m Version
270 currentVersion = do
271 var <- view repoVar
272 r <- liftBase $ readMVar var
273 pure $ r ^. r_version
274
275
276 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
277 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
278 commitStatePatch (Versioned p_version p) = do
279 var <- view repoVar
280 vq' <- liftBase $ modifyMVar var $ \r -> do
281 let
282 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
283 (p', q') = transformWith ngramsStatePatchConflictResolution p q
284 r' = r & r_version +~ 1
285 & r_state %~ act p'
286 & r_history %~ (p' :)
287 {-
288 -- Ideally we would like to check these properties. However:
289 -- * They should be checked only to debug the code. The client data
290 -- should be able to trigger these.
291 -- * What kind of error should they throw (we are in IO here)?
292 -- * Should we keep modifyMVar?
293 -- * Should we throw the validation in an Exception, catch it around
294 -- modifyMVar and throw it back as an Error?
295 assertValid $ transformable p q
296 assertValid $ applicable p' (r ^. r_state)
297 -}
298 pure (r', Versioned (r' ^. r_version) q')
299
300 saveRepo
301 pure vq'
302
303 -- This is a special case of tableNgramsPut where the input patch is empty.
304 tableNgramsPull :: RepoCmdM env err m
305 => ListId
306 -> TableNgrams.NgramsType
307 -> Version
308 -> m (Versioned NgramsTablePatch)
309 tableNgramsPull listId ngramsType p_version = do
310 var <- view repoVar
311 r <- liftBase $ readMVar var
312
313 let
314 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
315 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
316
317 pure (Versioned (r ^. r_version) q_table)
318
319 -- Apply the given patch to the DB and returns the patch to be applied on the
320 -- client.
321 -- TODO-ACCESS check
322 tableNgramsPut :: ( HasNodeError err
323 , HasTreeError err
324 , HasInvalidError err
325 , HasConfig env
326 , HasConnectionPool env
327 , HasSettings env
328 , RepoCmdM env err m
329 )
330 => TabType
331 -> ListId
332 -> Versioned NgramsTablePatch
333 -> m (Versioned NgramsTablePatch)
334 tableNgramsPut tabType listId (Versioned p_version p_table)
335 | p_table == mempty = do
336 let ngramsType = ngramsTypeFromTabType tabType
337 tableNgramsPull listId ngramsType p_version
338
339 | otherwise = do
340 let ngramsType = ngramsTypeFromTabType tabType
341 (p0, p0_validity) = PM.singleton listId p_table
342 (p, p_validity) = PM.singleton ngramsType p0
343
344 assertValid p0_validity
345 assertValid p_validity
346
347 ret <- commitStatePatch (Versioned p_version p)
348 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
349
350 node <- getNode listId
351 let nId = node ^. node_id
352 _uId = node ^. node_userId
353 mCId = node ^. node_parentId
354 -- printDebug "[tableNgramsPut] updating graph with nId" nId
355 -- printDebug "[tableNgramsPut] updating graph with uId" uId
356 -- _ <- recomputeGraph uId nId Conditional
357
358 printDebug "[tableNgramsPut] tabType" tabType
359 printDebug "[tableNgramsPut] listId" listId
360
361 _ <- case mCId of
362 Nothing -> do
363 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
364 pure ()
365 Just cId -> do
366 case tabType of
367 Authors -> do
368 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
369 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
370 pure ()
371 Institutes -> do
372 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
373 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
374 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
375 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
376 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
377 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
378 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
379 pure ()
380 Sources -> do
381 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
382 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
383 pure ()
384 Terms -> do
385 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
386 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
387 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
388 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
389 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
390 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
391 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
392 pure ()
393 _ -> do
394 printDebug "[tableNgramsPut] no update for tabType = " tabType
395 pure ()
396 pure ()
397 pure ret
398
399 {-
400 { _ne_list :: ListType
401 If we merge the parents/children we can potentially create cycles!
402 , _ne_parent :: Maybe NgramsTerm
403 , _ne_children :: MSet NgramsTerm
404 }
405 -}
406
407 getNgramsTableMap :: RepoCmdM env err m
408 => NodeId
409 -> TableNgrams.NgramsType
410 -> m (Versioned NgramsTableMap)
411 getNgramsTableMap nodeId ngramsType = do
412 v <- view repoVar
413 repo <- liftBase $ readMVar v
414 pure $ Versioned (repo ^. r_version)
415 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
416
417 dumpJsonTableMap :: RepoCmdM env err m
418 => Text
419 -> NodeId
420 -> TableNgrams.NgramsType
421 -> m ()
422 dumpJsonTableMap fpath nodeId ngramsType = do
423 m <- getNgramsTableMap nodeId ngramsType
424 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
425 pure ()
426
427 type MinSize = Int
428 type MaxSize = Int
429
430 -- | TODO Errors management
431 -- TODO: polymorphic for Annuaire or Corpus or ...
432 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
433 -- TODO: should take only one ListId
434
435 getTime' :: MonadBase IO m => m TimeSpec
436 getTime' = liftBase $ getTime ProcessCPUTime
437
438
439 getTableNgrams :: forall env err m.
440 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
441 => NodeType -> NodeId -> TabType
442 -> ListId -> Limit -> Maybe Offset
443 -> Maybe ListType
444 -> Maybe MinSize -> Maybe MaxSize
445 -> Maybe OrderBy
446 -> (NgramsTerm -> Bool)
447 -> m (Versioned NgramsTable)
448 getTableNgrams _nType nId tabType listId limit_ offset
449 listType minSize maxSize orderBy searchQuery = do
450
451 t0 <- getTime'
452 -- lIds <- selectNodesWithUsername NodeList userMaster
453 let
454 ngramsType = ngramsTypeFromTabType tabType
455 offset' = maybe 0 identity offset
456 listType' = maybe (const True) (==) listType
457 minSize' = maybe (const True) (<=) minSize
458 maxSize' = maybe (const True) (>=) maxSize
459
460 selected_node n = minSize' s
461 && maxSize' s
462 && searchQuery (n ^. ne_ngrams)
463 && listType' (n ^. ne_list)
464 where
465 s = n ^. ne_size
466
467 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
468
469 ---------------------------------------
470 sortOnOrder Nothing = identity
471 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
472 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
473 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
474 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
475
476 ---------------------------------------
477 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
478 selectAndPaginate tableMap = roots <> inners
479 where
480 list = tableMap ^.. each
481 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
482 (ne ^. ne_root)
483 selected_nodes = list & take limit_
484 . drop offset'
485 . filter selected_node
486 . sortOnOrder orderBy
487 roots = rootOf <$> selected_nodes
488 rootsSet = Set.fromList (_ne_ngrams <$> roots)
489 inners = list & filter (selected_inner rootsSet)
490
491 ---------------------------------------
492 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
493 setScores False table = pure table
494 setScores True table = do
495 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
496 t1 <- getTime'
497 occurrences <- getOccByNgramsOnlyFast' nId
498 listId
499 ngramsType
500 ngrams_terms
501 t2 <- getTime'
502 liftBase $ hprint stderr
503 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
504 (length ngrams_terms) t1 t2
505 {-
506 occurrences <- getOccByNgramsOnlySlow nType nId
507 (lIds <> [listId])
508 ngramsType
509 ngrams_terms
510 -}
511 let
512 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
513
514 pure $ table & each %~ setOcc
515 ---------------------------------------
516
517 -- lists <- catMaybes <$> listsWith userMaster
518 -- trace (show lists) $
519 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
520
521 let scoresNeeded = needsScores orderBy
522 tableMap1 <- getNgramsTableMap listId ngramsType
523 t1 <- getTime'
524 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
525 . Map.mapWithKey ngramsElementFromRepo
526 t2 <- getTime'
527 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
528 . setScores (not scoresNeeded)
529 . selectAndPaginate
530 t3 <- getTime'
531 liftBase $ hprint stderr
532 ("getTableNgrams total=" % timeSpecs
533 % " map1=" % timeSpecs
534 % " map2=" % timeSpecs
535 % " map3=" % timeSpecs
536 % " sql=" % (if scoresNeeded then "map2" else "map3")
537 % "\n"
538 ) t0 t3 t0 t1 t1 t2 t2 t3
539 pure tableMap3
540
541
542 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
543 scoresRecomputeTableNgrams nId tabType listId = do
544 tableMap <- getNgramsTableMap listId ngramsType
545 _ <- tableMap & v_data %%~ setScores
546 . Map.mapWithKey ngramsElementFromRepo
547
548 pure $ 1
549 where
550 ngramsType = ngramsTypeFromTabType tabType
551
552 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
553 setScores table = do
554 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
555 occurrences <- getOccByNgramsOnlyFast' nId
556 listId
557 ngramsType
558 ngrams_terms
559 let
560 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
561
562 pure $ table & each %~ setOcc
563
564
565
566 -- APIs
567
568 -- TODO: find a better place for the code above, All APIs stay here
569
570 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
571 deriving (Generic, Enum, Bounded, Read, Show)
572
573 instance FromHttpApiData OrderBy
574 where
575 parseUrlPiece "TermAsc" = pure TermAsc
576 parseUrlPiece "TermDesc" = pure TermDesc
577 parseUrlPiece "ScoreAsc" = pure ScoreAsc
578 parseUrlPiece "ScoreDesc" = pure ScoreDesc
579 parseUrlPiece _ = Left "Unexpected value of OrderBy"
580
581
582 instance ToParamSchema OrderBy
583 instance FromJSON OrderBy
584 instance ToJSON OrderBy
585 instance ToSchema OrderBy
586 instance Arbitrary OrderBy
587 where
588 arbitrary = elements [minBound..maxBound]
589
590 needsScores :: Maybe OrderBy -> Bool
591 needsScores (Just ScoreAsc) = True
592 needsScores (Just ScoreDesc) = True
593 needsScores _ = False
594
595 type TableNgramsApiGet = Summary " Table Ngrams API Get"
596 :> QueryParamR "ngramsType" TabType
597 :> QueryParamR "list" ListId
598 :> QueryParamR "limit" Limit
599 :> QueryParam "offset" Offset
600 :> QueryParam "listType" ListType
601 :> QueryParam "minTermSize" MinSize
602 :> QueryParam "maxTermSize" MaxSize
603 :> QueryParam "orderBy" OrderBy
604 :> QueryParam "search" Text
605 :> Get '[JSON] (Versioned NgramsTable)
606
607 type TableNgramsApiPut = Summary " Table Ngrams API Change"
608 :> QueryParamR "ngramsType" TabType
609 :> QueryParamR "list" ListId
610 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
611 :> Put '[JSON] (Versioned NgramsTablePatch)
612
613 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
614 :> QueryParamR "ngramsType" TabType
615 :> QueryParamR "list" ListId
616 :> "recompute" :> Post '[JSON] Int
617
618 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
619 :> QueryParamR "ngramsType" TabType
620 :> QueryParamR "list" ListId
621 :> Get '[JSON] Version
622
623 type TableNgramsApi = TableNgramsApiGet
624 :<|> TableNgramsApiPut
625 :<|> RecomputeScoresNgramsApiGet
626 :<|> "version" :> TableNgramsApiGetVersion
627
628 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
629 => NodeId
630 -> TabType
631 -> ListId
632 -> Limit
633 -> Maybe Offset
634 -> Maybe ListType
635 -> Maybe MinSize -> Maybe MaxSize
636 -> Maybe OrderBy
637 -> Maybe Text -- full text search
638 -> m (Versioned NgramsTable)
639 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
640 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
641 where
642 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
643
644 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
645 => NodeId
646 -> TabType
647 -> ListId
648 -> m Version
649 getTableNgramsVersion _nId _tabType _listId = currentVersion
650 -- TODO: limit?
651 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
652 -- This line above looks like a waste of computation to finally get only the version.
653 -- See the comment about listNgramsChangedSince.
654
655
656 -- | Text search is deactivated for now for ngrams by doc only
657 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
658 => DocId -> TabType
659 -> ListId -> Limit -> Maybe Offset
660 -> Maybe ListType
661 -> Maybe MinSize -> Maybe MaxSize
662 -> Maybe OrderBy
663 -> Maybe Text -- full text search
664 -> m (Versioned NgramsTable)
665 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
666 ns <- selectNodesWithUsername NodeList userMaster
667 let ngramsType = ngramsTypeFromTabType tabType
668 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
669 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
670 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
671
672
673
674 apiNgramsTableCorpus :: ( RepoCmdM env err m
675 , HasNodeError err
676 , HasTreeError err
677 , HasInvalidError err
678 , HasConnectionPool env
679 , HasConfig env
680 , HasSettings env
681 )
682 => NodeId -> ServerT TableNgramsApi m
683 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
684 :<|> tableNgramsPut
685 :<|> scoresRecomputeTableNgrams cId
686 :<|> getTableNgramsVersion cId
687
688 apiNgramsTableDoc :: ( RepoCmdM env err m
689 , HasNodeError err
690 , HasTreeError err
691 , HasInvalidError err
692 , HasConnectionPool env
693 , HasConfig env
694 , HasSettings env
695 )
696 => DocId -> ServerT TableNgramsApi m
697 apiNgramsTableDoc dId = getTableNgramsDoc dId
698 :<|> tableNgramsPut
699 :<|> scoresRecomputeTableNgrams dId
700 :<|> getTableNgramsVersion dId
701 -- > index all the corpus accordingly (TODO AD)
702
703 -- Did the given list of ngrams changed since the given version?
704 -- The returned value is versioned boolean value, meaning that one always retrieve the
705 -- latest version.
706 -- If the given version is negative then one simply receive the latest version and True.
707 -- Using this function is more precise than simply comparing the latest version number
708 -- with the local version number. Indeed there might be no change to this particular list
709 -- and still the version number has changed because of other lists.
710 --
711 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
712 -- * currentVersion: good computation, good bandwidth, bad precision.
713 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
714 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
715 listNgramsChangedSince :: RepoCmdM env err m
716 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
717 listNgramsChangedSince listId ngramsType version
718 | version < 0 =
719 Versioned <$> currentVersion <*> pure True
720 | otherwise =
721 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)