2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 get ngrams filtered by NgramsType
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
25 module Gargantext.API.Ngrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
56 , NgramsRepoElement(..)
81 , VersionedWithCount(..)
83 , listNgramsChangedSince
87 import Control.Concurrent
88 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
89 import Control.Monad.Reader
90 import Data.Aeson hiding ((.=))
91 import qualified Data.Aeson.Text as DAT
92 import Data.Either (Either(..))
94 import qualified Data.List as List
95 import Data.Map.Strict (Map)
96 import qualified Data.Map.Strict as Map
97 import qualified Data.Map.Strict.Patch as PM
98 import Data.Maybe (fromMaybe)
100 import Data.Ord (Down(..))
101 import Data.Patch.Class (Action(act), Transformable(..), ours)
102 import qualified Data.Set as S
103 import qualified Data.Set as Set
104 import Data.Swagger hiding (version, patch)
105 import Data.Text (Text, isInfixOf, unpack)
106 import Data.Text.Lazy.IO as DTL
107 import Formatting (hprint, int, (%))
108 import Formatting.Clock (timeSpecs)
109 import GHC.Generics (Generic)
110 import Servant hiding (Patch)
111 import System.Clock (getTime, TimeSpec, Clock(..))
112 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
113 import System.IO (stderr)
114 import Test.QuickCheck (elements)
115 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
117 import Prelude (error)
118 import Gargantext.Prelude hiding (log)
120 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
121 import Gargantext.API.Admin.Types (HasSettings)
122 import qualified Gargantext.API.Metrics as Metrics
123 import Gargantext.API.Ngrams.Types
124 import Gargantext.API.Prelude
125 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
126 import Gargantext.Core.Utils (something)
127 -- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
128 -- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
129 import Gargantext.Database.Action.Flow.Types
130 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
131 import Gargantext.Database.Admin.Config (userMaster)
132 import Gargantext.Database.Admin.Types.Node (NodeType(..))
133 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
134 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
135 import Gargantext.Database.Query.Table.Node.Select
136 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
137 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
138 import Gargantext.Database.Query.Table.Node (getNode)
139 import Gargantext.Database.Schema.Node (node_id, node_parentId, node_userId)
140 import Gargantext.Prelude.Job
143 -- TODO sequences of modifications (Patchs)
144 type NgramsIdPatch = Patch NgramsId NgramsPatch
146 ngramsPatch :: Int -> NgramsPatch
147 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
149 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
150 toEdit n p = Edit n p
151 ngramsIdPatch :: Patch NgramsId NgramsPatch
152 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
153 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
154 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
157 -- applyPatchBack :: Patch -> IO Patch
158 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
160 ------------------------------------------------------------------------
161 ------------------------------------------------------------------------
162 ------------------------------------------------------------------------
165 -- TODO: Replace.old is ignored which means that if the current list
166 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
167 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
168 -- However this should not happen in non conflicting situations.
169 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
170 mkListsUpdate nt patches =
171 [ (ngramsTypeId nt, ng, listTypeId lt)
172 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
173 , lt <- patch ^.. patch_list . new
176 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
179 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
180 mkChildrenGroups addOrRem nt patches =
181 [ (ngramsTypeId nt, parent, child)
182 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
183 , child <- patch ^.. patch_children . to addOrRem . folded
187 ------------------------------------------------------------------------
189 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
191 saveRepo = liftBase =<< view repoSaver
193 listTypeConflictResolution :: ListType -> ListType -> ListType
194 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
196 ngramsStatePatchConflictResolution
197 :: TableNgrams.NgramsType
200 -> ConflictResolutionNgramsPatch
201 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
202 = (ours, (const ours, ours), (False, False))
203 -- (False, False) mean here that Mod has always priority.
204 -- (True, False) <- would mean priority to the left (same as ours).
206 -- undefined {- TODO think this through -}, listTypeConflictResolution)
209 -- Insertions are not considered as patches,
210 -- they do not extend history,
211 -- they do not bump version.
212 insertNewOnly :: a -> Maybe b -> a
213 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
214 -- TODO error handling
217 -- TODO refactor with putListNgrams
218 copyListNgrams :: RepoCmdM env err m
219 => NodeId -> NodeId -> NgramsType
221 copyListNgrams srcListId dstListId ngramsType = do
223 liftBase $ modifyMVar_ var $
224 pure . (r_state . at ngramsType %~ (Just . f . something))
227 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
228 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
230 -- TODO refactor with putListNgrams
231 -- The list must be non-empty!
232 -- The added ngrams must be non-existent!
233 addListNgrams :: RepoCmdM env err m
234 => NodeId -> NgramsType
235 -> [NgramsElement] -> m ()
236 addListNgrams listId ngramsType nes = do
238 liftBase $ modifyMVar_ var $
239 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
242 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
246 rmListNgrams :: RepoCmdM env err m
248 -> TableNgrams.NgramsType
250 rmListNgrams l nt = setListNgrams l nt mempty
252 -- | TODO: incr the Version number
253 -- && should use patch
255 setListNgrams :: RepoCmdM env err m
257 -> TableNgrams.NgramsType
258 -> Map NgramsTerm NgramsRepoElement
260 setListNgrams listId ngramsType ns = do
262 liftBase $ modifyMVar_ var $
266 (at listId .~ ( Just ns))
273 currentVersion :: RepoCmdM env err m
277 r <- liftBase $ readMVar var
278 pure $ r ^. r_version
280 newNgramsFromNgramsStatePatch :: NgramsStatePatch -> [Ngrams]
281 newNgramsFromNgramsStatePatch p =
282 [ text2ngrams (unNgramsTerm n)
283 | (n,np) <- p ^.. _PatchMap . each . _PatchMap . each . _NgramsTablePatch . _PatchMap . ifolded . withIndex
284 , _ <- np ^.. patch_new . _Just
287 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
288 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
289 commitStatePatch (Versioned p_version p) = do
291 vq' <- liftBase $ modifyMVar var $ \r -> do
293 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
294 (p', q') = transformWith ngramsStatePatchConflictResolution p q
295 r' = r & r_version +~ 1
297 & r_history %~ (p' :)
299 -- Ideally we would like to check these properties. However:
300 -- * They should be checked only to debug the code. The client data
301 -- should be able to trigger these.
302 -- * What kind of error should they throw (we are in IO here)?
303 -- * Should we keep modifyMVar?
304 -- * Should we throw the validation in an Exception, catch it around
305 -- modifyMVar and throw it back as an Error?
306 assertValid $ transformable p q
307 assertValid $ applicable p' (r ^. r_state)
309 pure (r', Versioned (r' ^. r_version) q')
314 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
318 -- This is a special case of tableNgramsPut where the input patch is empty.
319 tableNgramsPull :: RepoCmdM env err m
321 -> TableNgrams.NgramsType
323 -> m (Versioned NgramsTablePatch)
324 tableNgramsPull listId ngramsType p_version = do
326 r <- liftBase $ readMVar var
329 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
330 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
332 pure (Versioned (r ^. r_version) q_table)
334 -- Apply the given patch to the DB and returns the patch to be applied on the
337 tableNgramsPut :: ( FlowCmdM env err m
342 -> Versioned NgramsTablePatch
343 -> m (Versioned NgramsTablePatch)
344 tableNgramsPut tabType listId (Versioned p_version p_table)
345 | p_table == mempty = do
346 let ngramsType = ngramsTypeFromTabType tabType
347 tableNgramsPull listId ngramsType p_version
350 let ngramsType = ngramsTypeFromTabType tabType
351 (p0, p0_validity) = PM.singleton listId p_table
352 (p, p_validity) = PM.singleton ngramsType p0
354 assertValid p0_validity
355 assertValid p_validity
357 ret <- commitStatePatch (Versioned p_version p)
358 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
363 tableNgramsPostChartsAsync :: ( FlowCmdM env err m
367 => UpdateTableNgramsCharts
370 tableNgramsPostChartsAsync utn logStatus = do
371 let tabType = utn ^. utn_tab_type
372 let listId = utn ^. utn_list_id
374 node <- getNode listId
375 let nId = node ^. node_id
376 _uId = node ^. node_userId
377 mCId = node ^. node_parentId
379 printDebug "[tableNgramsPut] tabType" tabType
380 printDebug "[tableNgramsPut] listId" listId
384 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
385 pure $ jobLogFail $ jobLogInit 1
389 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
390 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
392 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
397 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
398 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
399 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
401 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
402 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
404 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
405 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
407 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
412 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
413 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
415 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
420 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
421 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
424 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
426 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
428 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
430 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
432 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
434 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
440 printDebug "[tableNgramsPut] no update for tabType = " tabType
441 pure $ jobLogFail $ jobLogInit 1
444 { _ne_list :: ListType
445 If we merge the parents/children we can potentially create cycles!
446 , _ne_parent :: Maybe NgramsTerm
447 , _ne_children :: MSet NgramsTerm
451 getNgramsTableMap :: RepoCmdM env err m
453 -> TableNgrams.NgramsType
454 -> m (Versioned NgramsTableMap)
455 getNgramsTableMap nodeId ngramsType = do
457 repo <- liftBase $ readMVar v
458 pure $ Versioned (repo ^. r_version)
459 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
461 dumpJsonTableMap :: RepoCmdM env err m
464 -> TableNgrams.NgramsType
466 dumpJsonTableMap fpath nodeId ngramsType = do
467 m <- getNgramsTableMap nodeId ngramsType
468 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
474 -- | TODO Errors management
475 -- TODO: polymorphic for Annuaire or Corpus or ...
476 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
477 -- TODO: should take only one ListId
479 getTime' :: MonadBase IO m => m TimeSpec
480 getTime' = liftBase $ getTime ProcessCPUTime
483 getTableNgrams :: forall env err m.
484 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
485 => NodeType -> NodeId -> TabType
486 -> ListId -> Limit -> Maybe Offset
488 -> Maybe MinSize -> Maybe MaxSize
490 -> (NgramsTerm -> Bool)
491 -> m (VersionedWithCount NgramsTable)
492 getTableNgrams _nType nId tabType listId limit_ offset
493 listType minSize maxSize orderBy searchQuery = do
496 -- lIds <- selectNodesWithUsername NodeList userMaster
498 ngramsType = ngramsTypeFromTabType tabType
499 offset' = maybe 0 identity offset
500 listType' = maybe (const True) (==) listType
501 minSize' = maybe (const True) (<=) minSize
502 maxSize' = maybe (const True) (>=) maxSize
504 selected_node n = minSize' s
506 && searchQuery (n ^. ne_ngrams)
507 && listType' (n ^. ne_list)
511 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
513 ---------------------------------------
514 sortOnOrder Nothing = identity
515 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
516 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
517 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
518 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
520 ---------------------------------------
522 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
523 filteredNodes tableMap = rootOf <$> list & filter selected_node
525 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
527 list = tableMap ^.. each
529 ---------------------------------------
530 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
531 selectAndPaginate tableMap = roots <> inners
533 list = tableMap ^.. each
534 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
536 selected_nodes = list & take limit_
538 . filter selected_node
539 . sortOnOrder orderBy
540 roots = rootOf <$> selected_nodes
541 rootsSet = Set.fromList (_ne_ngrams <$> roots)
542 inners = list & filter (selected_inner rootsSet)
544 ---------------------------------------
545 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
546 setScores False table = pure table
547 setScores True table = do
548 let ngrams_terms = table ^.. each . ne_ngrams
550 occurrences <- getOccByNgramsOnlyFast' nId
555 liftBase $ hprint stderr
556 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
557 (length ngrams_terms) t1 t2
559 occurrences <- getOccByNgramsOnlySlow nType nId
565 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
567 pure $ table & each %~ setOcc
568 ---------------------------------------
570 -- lists <- catMaybes <$> listsWith userMaster
571 -- trace (show lists) $
572 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
575 let scoresNeeded = needsScores orderBy
576 tableMap1 <- getNgramsTableMap listId ngramsType
578 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
579 . Map.mapWithKey ngramsElementFromRepo
581 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
583 let fltrCount = length $ fltr ^. v_data . _NgramsTable
586 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
587 . setScores (not scoresNeeded)
590 liftBase $ hprint stderr
591 ("getTableNgrams total=" % timeSpecs
592 % " map1=" % timeSpecs
593 % " map2=" % timeSpecs
594 % " map3=" % timeSpecs
595 % " sql=" % (if scoresNeeded then "map2" else "map3")
597 ) t0 t3 t0 t1 t1 t2 t2 t3
598 pure $ toVersionedWithCount fltrCount tableMap3
601 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
602 scoresRecomputeTableNgrams nId tabType listId = do
603 tableMap <- getNgramsTableMap listId ngramsType
604 _ <- tableMap & v_data %%~ setScores
605 . Map.mapWithKey ngramsElementFromRepo
609 ngramsType = ngramsTypeFromTabType tabType
611 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
613 let ngrams_terms = table ^.. each . ne_ngrams
614 occurrences <- getOccByNgramsOnlyFast' nId
619 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
621 pure $ table & each %~ setOcc
627 -- TODO: find a better place for the code above, All APIs stay here
629 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
630 deriving (Generic, Enum, Bounded, Read, Show)
632 instance FromHttpApiData OrderBy
634 parseUrlPiece "TermAsc" = pure TermAsc
635 parseUrlPiece "TermDesc" = pure TermDesc
636 parseUrlPiece "ScoreAsc" = pure ScoreAsc
637 parseUrlPiece "ScoreDesc" = pure ScoreDesc
638 parseUrlPiece _ = Left "Unexpected value of OrderBy"
641 instance ToParamSchema OrderBy
642 instance FromJSON OrderBy
643 instance ToJSON OrderBy
644 instance ToSchema OrderBy
645 instance Arbitrary OrderBy
647 arbitrary = elements [minBound..maxBound]
649 needsScores :: Maybe OrderBy -> Bool
650 needsScores (Just ScoreAsc) = True
651 needsScores (Just ScoreDesc) = True
652 needsScores _ = False
654 type TableNgramsApiGet = Summary " Table Ngrams API Get"
655 :> QueryParamR "ngramsType" TabType
656 :> QueryParamR "list" ListId
657 :> QueryParamR "limit" Limit
658 :> QueryParam "offset" Offset
659 :> QueryParam "listType" ListType
660 :> QueryParam "minTermSize" MinSize
661 :> QueryParam "maxTermSize" MaxSize
662 :> QueryParam "orderBy" OrderBy
663 :> QueryParam "search" Text
664 :> Get '[JSON] (VersionedWithCount NgramsTable)
666 type TableNgramsApiPut = Summary " Table Ngrams API Change"
667 :> QueryParamR "ngramsType" TabType
668 :> QueryParamR "list" ListId
669 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
670 :> Put '[JSON] (Versioned NgramsTablePatch)
672 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
673 :> QueryParamR "ngramsType" TabType
674 :> QueryParamR "list" ListId
675 :> "recompute" :> Post '[JSON] Int
677 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
678 :> QueryParamR "ngramsType" TabType
679 :> QueryParamR "list" ListId
680 :> Get '[JSON] Version
682 type TableNgramsApi = TableNgramsApiGet
683 :<|> TableNgramsApiPut
684 :<|> RecomputeScoresNgramsApiGet
685 :<|> "version" :> TableNgramsApiGetVersion
686 :<|> TableNgramsAsyncApi
688 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
692 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
694 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
701 -> Maybe MinSize -> Maybe MaxSize
703 -> Maybe Text -- full text search
704 -> m (VersionedWithCount NgramsTable)
705 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
706 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
708 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
710 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
715 getTableNgramsVersion _nId _tabType _listId = currentVersion
717 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
718 -- This line above looks like a waste of computation to finally get only the version.
719 -- See the comment about listNgramsChangedSince.
722 -- | Text search is deactivated for now for ngrams by doc only
723 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
725 -> ListId -> Limit -> Maybe Offset
727 -> Maybe MinSize -> Maybe MaxSize
729 -> Maybe Text -- full text search
730 -> m (VersionedWithCount NgramsTable)
731 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
732 ns <- selectNodesWithUsername NodeList userMaster
733 let ngramsType = ngramsTypeFromTabType tabType
734 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
735 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
736 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
740 apiNgramsTableCorpus :: ( GargServerC env err m
742 => NodeId -> ServerT TableNgramsApi m
743 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
745 :<|> scoresRecomputeTableNgrams cId
746 :<|> getTableNgramsVersion cId
747 :<|> apiNgramsAsync cId
749 apiNgramsTableDoc :: ( GargServerC env err m
751 => DocId -> ServerT TableNgramsApi m
752 apiNgramsTableDoc dId = getTableNgramsDoc dId
754 :<|> scoresRecomputeTableNgrams dId
755 :<|> getTableNgramsVersion dId
756 :<|> apiNgramsAsync dId
757 -- > index all the corpus accordingly (TODO AD)
759 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
760 apiNgramsAsync _dId =
762 JobFunction $ \i log ->
765 printDebug "tableNgramsPostChartsAsync" x
767 in tableNgramsPostChartsAsync i log'
769 -- Did the given list of ngrams changed since the given version?
770 -- The returned value is versioned boolean value, meaning that one always retrieve the
772 -- If the given version is negative then one simply receive the latest version and True.
773 -- Using this function is more precise than simply comparing the latest version number
774 -- with the local version number. Indeed there might be no change to this particular list
775 -- and still the version number has changed because of other lists.
777 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
778 -- * currentVersion: good computation, good bandwidth, bad precision.
779 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
780 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
781 listNgramsChangedSince :: RepoCmdM env err m
782 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
783 listNgramsChangedSince listId ngramsType version
785 Versioned <$> currentVersion <*> pure True
787 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)