2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 get ngrams filtered by NgramsType
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
25 module Gargantext.API.Ngrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
56 , NgramsRepoElement(..)
82 , listNgramsChangedSince
86 import Control.Concurrent
87 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
88 import Control.Monad.Reader
89 import Data.Aeson hiding ((.=))
90 import qualified Data.Aeson.Text as DAT
91 import Data.Either (Either(..))
93 import qualified Data.List as List
94 import Data.Map.Strict (Map)
95 import qualified Data.Map.Strict as Map
96 import qualified Data.Map.Strict.Patch as PM
97 import Data.Maybe (fromMaybe)
99 import Data.Ord (Down(..))
100 import Data.Patch.Class (Action(act), Transformable(..), ours)
101 import qualified Data.Set as S
102 import qualified Data.Set as Set
103 import Data.Swagger hiding (version, patch)
104 import Data.Text (Text, isInfixOf, unpack)
105 import Data.Text.Lazy.IO as DTL
106 import Formatting (hprint, int, (%))
107 import Formatting.Clock (timeSpecs)
108 import GHC.Generics (Generic)
109 import Servant hiding (Patch)
110 import System.Clock (getTime, TimeSpec, Clock(..))
111 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
112 import System.IO (stderr)
113 import Test.QuickCheck (elements)
114 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
116 import Prelude (error)
117 import Gargantext.Prelude hiding (log)
119 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
120 import Gargantext.API.Admin.Types (HasSettings)
121 import qualified Gargantext.API.Metrics as Metrics
122 import Gargantext.API.Ngrams.Types
123 import Gargantext.API.Prelude
124 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
125 import Gargantext.Core.Utils (something)
126 -- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
127 -- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
128 import Gargantext.Database.Action.Flow.Types
129 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
130 import Gargantext.Database.Admin.Config (userMaster)
131 import Gargantext.Database.Admin.Types.Node (NodeType(..))
132 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
133 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
134 import Gargantext.Database.Query.Table.Node.Select
135 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
136 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
137 import Gargantext.Database.Query.Table.Node (getNode)
138 import Gargantext.Database.Schema.Node (node_id, node_parentId, node_userId)
139 import Gargantext.Prelude.Job
142 -- TODO sequences of modifications (Patchs)
143 type NgramsIdPatch = Patch NgramsId NgramsPatch
145 ngramsPatch :: Int -> NgramsPatch
146 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
148 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
149 toEdit n p = Edit n p
150 ngramsIdPatch :: Patch NgramsId NgramsPatch
151 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
152 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
153 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
156 -- applyPatchBack :: Patch -> IO Patch
157 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
159 ------------------------------------------------------------------------
160 ------------------------------------------------------------------------
161 ------------------------------------------------------------------------
164 -- TODO: Replace.old is ignored which means that if the current list
165 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
166 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
167 -- However this should not happen in non conflicting situations.
168 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
169 mkListsUpdate nt patches =
170 [ (ngramsTypeId nt, ng, listTypeId lt)
171 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
172 , lt <- patch ^.. patch_list . new
175 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
178 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
179 mkChildrenGroups addOrRem nt patches =
180 [ (ngramsTypeId nt, parent, child)
181 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
182 , child <- patch ^.. patch_children . to addOrRem . folded
186 ------------------------------------------------------------------------
188 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
190 saveRepo = liftBase =<< view repoSaver
192 listTypeConflictResolution :: ListType -> ListType -> ListType
193 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
195 ngramsStatePatchConflictResolution
196 :: TableNgrams.NgramsType
199 -> ConflictResolutionNgramsPatch
200 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
201 = (ours, (const ours, ours), (False, False))
202 -- (False, False) mean here that Mod has always priority.
203 -- (True, False) <- would mean priority to the left (same as ours).
205 -- undefined {- TODO think this through -}, listTypeConflictResolution)
208 -- Insertions are not considered as patches,
209 -- they do not extend history,
210 -- they do not bump version.
211 insertNewOnly :: a -> Maybe b -> a
212 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
213 -- TODO error handling
216 -- TODO refactor with putListNgrams
217 copyListNgrams :: RepoCmdM env err m
218 => NodeId -> NodeId -> NgramsType
220 copyListNgrams srcListId dstListId ngramsType = do
222 liftBase $ modifyMVar_ var $
223 pure . (r_state . at ngramsType %~ (Just . f . something))
226 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
227 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
229 -- TODO refactor with putListNgrams
230 -- The list must be non-empty!
231 -- The added ngrams must be non-existent!
232 addListNgrams :: RepoCmdM env err m
233 => NodeId -> NgramsType
234 -> [NgramsElement] -> m ()
235 addListNgrams listId ngramsType nes = do
237 liftBase $ modifyMVar_ var $
238 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
241 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
245 rmListNgrams :: RepoCmdM env err m
247 -> TableNgrams.NgramsType
249 rmListNgrams l nt = setListNgrams l nt mempty
251 -- | TODO: incr the Version number
252 -- && should use patch
254 setListNgrams :: RepoCmdM env err m
256 -> TableNgrams.NgramsType
257 -> Map NgramsTerm NgramsRepoElement
259 setListNgrams listId ngramsType ns = do
261 liftBase $ modifyMVar_ var $
265 (at listId .~ ( Just ns))
272 currentVersion :: RepoCmdM env err m
276 r <- liftBase $ readMVar var
277 pure $ r ^. r_version
279 newNgramsFromNgramsStatePatch :: NgramsStatePatch -> [Ngrams]
280 newNgramsFromNgramsStatePatch p =
281 [ text2ngrams (unNgramsTerm n)
282 | (n,np) <- p ^.. _PatchMap . each . _PatchMap . each . _NgramsTablePatch . _PatchMap . ifolded . withIndex
283 , _ <- np ^.. patch_new . _Just
286 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
287 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
288 commitStatePatch (Versioned p_version p) = do
290 vq' <- liftBase $ modifyMVar var $ \r -> do
292 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
293 (p', q') = transformWith ngramsStatePatchConflictResolution p q
294 r' = r & r_version +~ 1
296 & r_history %~ (p' :)
298 -- Ideally we would like to check these properties. However:
299 -- * They should be checked only to debug the code. The client data
300 -- should be able to trigger these.
301 -- * What kind of error should they throw (we are in IO here)?
302 -- * Should we keep modifyMVar?
303 -- * Should we throw the validation in an Exception, catch it around
304 -- modifyMVar and throw it back as an Error?
305 assertValid $ transformable p q
306 assertValid $ applicable p' (r ^. r_state)
308 pure (r', Versioned (r' ^. r_version) q')
313 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
317 -- This is a special case of tableNgramsPut where the input patch is empty.
318 tableNgramsPull :: RepoCmdM env err m
320 -> TableNgrams.NgramsType
322 -> m (Versioned NgramsTablePatch)
323 tableNgramsPull listId ngramsType p_version = do
325 r <- liftBase $ readMVar var
328 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
329 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
331 pure (Versioned (r ^. r_version) q_table)
333 -- Apply the given patch to the DB and returns the patch to be applied on the
336 tableNgramsPut :: ( FlowCmdM env err m
341 -> Versioned NgramsTablePatch
342 -> m (Versioned NgramsTablePatch)
343 tableNgramsPut tabType listId (Versioned p_version p_table)
344 | p_table == mempty = do
345 let ngramsType = ngramsTypeFromTabType tabType
346 tableNgramsPull listId ngramsType p_version
349 let ngramsType = ngramsTypeFromTabType tabType
350 (p0, p0_validity) = PM.singleton listId p_table
351 (p, p_validity) = PM.singleton ngramsType p0
353 assertValid p0_validity
354 assertValid p_validity
356 ret <- commitStatePatch (Versioned p_version p)
357 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
362 tableNgramsPostChartsAsync :: ( FlowCmdM env err m
366 => UpdateTableNgramsCharts
369 tableNgramsPostChartsAsync utn logStatus = do
370 let tabType = utn ^. utn_tab_type
371 let listId = utn ^. utn_list_id
373 node <- getNode listId
374 let nId = node ^. node_id
375 _uId = node ^. node_userId
376 mCId = node ^. node_parentId
378 printDebug "[tableNgramsPut] tabType" tabType
379 printDebug "[tableNgramsPut] listId" listId
383 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
384 pure $ jobLogFail $ jobLogInit 1
388 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
389 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
391 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
396 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
397 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
398 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
400 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
401 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
403 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
404 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
406 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
411 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
412 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
414 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
419 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
420 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
423 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
425 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
427 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
429 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
431 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
433 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
439 printDebug "[tableNgramsPut] no update for tabType = " tabType
440 pure $ jobLogFail $ jobLogInit 1
443 { _ne_list :: ListType
444 If we merge the parents/children we can potentially create cycles!
445 , _ne_parent :: Maybe NgramsTerm
446 , _ne_children :: MSet NgramsTerm
450 getNgramsTableMap :: RepoCmdM env err m
452 -> TableNgrams.NgramsType
453 -> m (Versioned NgramsTableMap)
454 getNgramsTableMap nodeId ngramsType = do
456 repo <- liftBase $ readMVar v
457 pure $ Versioned (repo ^. r_version)
458 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
460 dumpJsonTableMap :: RepoCmdM env err m
463 -> TableNgrams.NgramsType
465 dumpJsonTableMap fpath nodeId ngramsType = do
466 m <- getNgramsTableMap nodeId ngramsType
467 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
473 -- | TODO Errors management
474 -- TODO: polymorphic for Annuaire or Corpus or ...
475 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
476 -- TODO: should take only one ListId
478 getTime' :: MonadBase IO m => m TimeSpec
479 getTime' = liftBase $ getTime ProcessCPUTime
482 getTableNgrams :: forall env err m.
483 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
484 => NodeType -> NodeId -> TabType
485 -> ListId -> Limit -> Maybe Offset
487 -> Maybe MinSize -> Maybe MaxSize
489 -> (NgramsTerm -> Bool)
490 -> m (Versioned NgramsTable)
491 getTableNgrams _nType nId tabType listId limit_ offset
492 listType minSize maxSize orderBy searchQuery = do
495 -- lIds <- selectNodesWithUsername NodeList userMaster
497 ngramsType = ngramsTypeFromTabType tabType
498 offset' = maybe 0 identity offset
499 listType' = maybe (const True) (==) listType
500 minSize' = maybe (const True) (<=) minSize
501 maxSize' = maybe (const True) (>=) maxSize
503 selected_node n = minSize' s
505 && searchQuery (n ^. ne_ngrams)
506 && listType' (n ^. ne_list)
510 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
512 ---------------------------------------
513 sortOnOrder Nothing = identity
514 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
515 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
516 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
517 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
519 ---------------------------------------
521 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
522 filteredNodes tableMap = rootOf <$> list & filter selected_node
524 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
526 list = tableMap ^.. each
528 ---------------------------------------
529 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
530 selectAndPaginate tableMap = roots <> inners
532 list = tableMap ^.. each
533 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
535 selected_nodes = list & take limit_
537 . filter selected_node
538 . sortOnOrder orderBy
539 roots = rootOf <$> selected_nodes
540 rootsSet = Set.fromList (_ne_ngrams <$> roots)
541 inners = list & filter (selected_inner rootsSet)
543 ---------------------------------------
544 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
545 setScores False table = pure table
546 setScores True table = do
547 let ngrams_terms = table ^.. each . ne_ngrams
549 occurrences <- getOccByNgramsOnlyFast' nId
554 liftBase $ hprint stderr
555 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
556 (length ngrams_terms) t1 t2
558 occurrences <- getOccByNgramsOnlySlow nType nId
564 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
566 pure $ table & each %~ setOcc
567 ---------------------------------------
569 -- lists <- catMaybes <$> listsWith userMaster
570 -- trace (show lists) $
571 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
574 let scoresNeeded = needsScores orderBy
575 tableMap1 <- getNgramsTableMap listId ngramsType
577 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
578 . Map.mapWithKey ngramsElementFromRepo
580 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
583 printDebug "[getTableNgrams] fltr" $ length $ fltr ^. v_data . _NgramsTable
586 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
587 . setScores (not scoresNeeded)
590 liftBase $ hprint stderr
591 ("getTableNgrams total=" % timeSpecs
592 % " map1=" % timeSpecs
593 % " map2=" % timeSpecs
594 % " map3=" % timeSpecs
595 % " sql=" % (if scoresNeeded then "map2" else "map3")
597 ) t0 t3 t0 t1 t1 t2 t2 t3
601 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
602 scoresRecomputeTableNgrams nId tabType listId = do
603 tableMap <- getNgramsTableMap listId ngramsType
604 _ <- tableMap & v_data %%~ setScores
605 . Map.mapWithKey ngramsElementFromRepo
609 ngramsType = ngramsTypeFromTabType tabType
611 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
613 let ngrams_terms = table ^.. each . ne_ngrams
614 occurrences <- getOccByNgramsOnlyFast' nId
619 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
621 pure $ table & each %~ setOcc
627 -- TODO: find a better place for the code above, All APIs stay here
629 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
630 deriving (Generic, Enum, Bounded, Read, Show)
632 instance FromHttpApiData OrderBy
634 parseUrlPiece "TermAsc" = pure TermAsc
635 parseUrlPiece "TermDesc" = pure TermDesc
636 parseUrlPiece "ScoreAsc" = pure ScoreAsc
637 parseUrlPiece "ScoreDesc" = pure ScoreDesc
638 parseUrlPiece _ = Left "Unexpected value of OrderBy"
641 instance ToParamSchema OrderBy
642 instance FromJSON OrderBy
643 instance ToJSON OrderBy
644 instance ToSchema OrderBy
645 instance Arbitrary OrderBy
647 arbitrary = elements [minBound..maxBound]
649 needsScores :: Maybe OrderBy -> Bool
650 needsScores (Just ScoreAsc) = True
651 needsScores (Just ScoreDesc) = True
652 needsScores _ = False
654 type TableNgramsApiGet = Summary " Table Ngrams API Get"
655 :> QueryParamR "ngramsType" TabType
656 :> QueryParamR "list" ListId
657 :> QueryParamR "limit" Limit
658 :> QueryParam "offset" Offset
659 :> QueryParam "listType" ListType
660 :> QueryParam "minTermSize" MinSize
661 :> QueryParam "maxTermSize" MaxSize
662 :> QueryParam "orderBy" OrderBy
663 :> QueryParam "search" Text
664 :> Get '[JSON] (Versioned NgramsTable)
666 type TableNgramsApiPut = Summary " Table Ngrams API Change"
667 :> QueryParamR "ngramsType" TabType
668 :> QueryParamR "list" ListId
669 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
670 :> Put '[JSON] (Versioned NgramsTablePatch)
672 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
673 :> QueryParamR "ngramsType" TabType
674 :> QueryParamR "list" ListId
675 :> "recompute" :> Post '[JSON] Int
677 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
678 :> QueryParamR "ngramsType" TabType
679 :> QueryParamR "list" ListId
680 :> Get '[JSON] Version
682 type TableNgramsApi = TableNgramsApiGet
683 :<|> TableNgramsApiPut
684 :<|> RecomputeScoresNgramsApiGet
685 :<|> "version" :> TableNgramsApiGetVersion
686 :<|> TableNgramsAsyncApi
688 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
692 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
694 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
701 -> Maybe MinSize -> Maybe MaxSize
703 -> Maybe Text -- full text search
704 -> m (Versioned NgramsTable)
705 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
706 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
708 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
710 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
715 getTableNgramsVersion _nId _tabType _listId = currentVersion
717 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
718 -- This line above looks like a waste of computation to finally get only the version.
719 -- See the comment about listNgramsChangedSince.
722 -- | Text search is deactivated for now for ngrams by doc only
723 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
725 -> ListId -> Limit -> Maybe Offset
727 -> Maybe MinSize -> Maybe MaxSize
729 -> Maybe Text -- full text search
730 -> m (Versioned NgramsTable)
731 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
732 ns <- selectNodesWithUsername NodeList userMaster
733 let ngramsType = ngramsTypeFromTabType tabType
734 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
735 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
736 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
740 apiNgramsTableCorpus :: ( GargServerC env err m
742 => NodeId -> ServerT TableNgramsApi m
743 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
745 :<|> scoresRecomputeTableNgrams cId
746 :<|> getTableNgramsVersion cId
747 :<|> apiNgramsAsync cId
749 apiNgramsTableDoc :: ( GargServerC env err m
751 => DocId -> ServerT TableNgramsApi m
752 apiNgramsTableDoc dId = getTableNgramsDoc dId
754 :<|> scoresRecomputeTableNgrams dId
755 :<|> getTableNgramsVersion dId
756 :<|> apiNgramsAsync dId
757 -- > index all the corpus accordingly (TODO AD)
759 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
760 apiNgramsAsync _dId =
762 JobFunction $ \i log ->
765 printDebug "tableNgramsPostChartsAsync" x
767 in tableNgramsPostChartsAsync i log'
769 -- Did the given list of ngrams changed since the given version?
770 -- The returned value is versioned boolean value, meaning that one always retrieve the
772 -- If the given version is negative then one simply receive the latest version and True.
773 -- Using this function is more precise than simply comparing the latest version number
774 -- with the local version number. Indeed there might be no change to this particular list
775 -- and still the version number has changed because of other lists.
777 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
778 -- * currentVersion: good computation, good bandwidth, bad precision.
779 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
780 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
781 listNgramsChangedSince :: RepoCmdM env err m
782 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
783 listNgramsChangedSince listId ngramsType version
785 Versioned <$> currentVersion <*> pure True
787 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)