2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 get ngrams filtered by NgramsType
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
33 --, rmListNgrams TODO fix before exporting
34 , apiNgramsTableCorpus
55 , NgramsRepoElement(..)
72 , VersionedWithCount(..)
74 , listNgramsChangedSince
75 , MinSize, MaxSize, OrderBy, NgramsTable
76 , UpdateTableNgramsCharts
80 import Control.Concurrent
81 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
82 import Control.Monad.Reader
83 import Data.Aeson hiding ((.=))
84 import Data.Either (Either(..))
86 import Data.Map.Strict (Map)
87 import Data.Maybe (fromMaybe)
89 import Data.Ord (Down(..))
90 import Data.Patch.Class (Action(act), Transformable(..), ours)
91 import Data.Swagger hiding (version, patch)
92 import Data.Text (Text, isInfixOf, unpack, pack)
93 import Data.Text.Lazy.IO as DTL
94 import Formatting (hprint, int, (%))
95 import GHC.Generics (Generic)
96 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
97 import Gargantext.API.Admin.Types (HasSettings)
98 import Gargantext.API.Job
99 import Gargantext.API.Ngrams.Types
100 import Gargantext.API.Prelude
101 import Gargantext.Core.NodeStory
102 import Gargantext.Core.Mail.Types (HasMail)
103 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
104 import Gargantext.API.Ngrams.Tools
105 import Gargantext.Database.Action.Flow.Types
106 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
107 import Gargantext.Database.Admin.Config (userMaster)
108 import Gargantext.Database.Admin.Types.Node (NodeType(..))
109 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
110 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
111 import Gargantext.Database.Query.Table.Node (getNode)
112 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
113 import Gargantext.Database.Query.Table.Node.Select
114 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
115 import Gargantext.Prelude hiding (log)
116 import Gargantext.Prelude.Clock (hasTime, getTime)
117 import Prelude (error)
118 import Servant hiding (Patch)
119 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
120 import System.IO (stderr)
121 import Test.QuickCheck (elements)
122 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
123 import qualified Data.Aeson.Text as DAT
124 import qualified Data.List as List
125 import qualified Data.Map.Strict as Map
126 import qualified Data.Map.Strict.Patch as PM
127 import qualified Data.Set as S
128 import qualified Data.Set as Set
129 import qualified Gargantext.API.Metrics as Metrics
130 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
133 -- TODO sequences of modifications (Patchs)
134 type NgramsIdPatch = Patch NgramsId NgramsPatch
136 ngramsPatch :: Int -> NgramsPatch
137 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
139 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
140 toEdit n p = Edit n p
141 ngramsIdPatch :: Patch NgramsId NgramsPatch
142 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
143 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
144 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
147 -- applyPatchBack :: Patch -> IO Patch
148 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
150 ------------------------------------------------------------------------
151 ------------------------------------------------------------------------
152 ------------------------------------------------------------------------
155 -- TODO: Replace.old is ignored which means that if the current list
156 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
157 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
158 -- However this should not happen in non conflicting situations.
159 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
160 mkListsUpdate nt patches =
161 [ (ngramsTypeId nt, ng, listTypeId lt)
162 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
163 , lt <- patch ^.. patch_list . new
166 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
169 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
170 mkChildrenGroups addOrRem nt patches =
171 [ (ngramsTypeId nt, parent, child)
172 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
173 , child <- patch ^.. patch_children . to addOrRem . folded
177 ------------------------------------------------------------------------
179 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
181 saveNodeStory = liftBase =<< view hasNodeStorySaver
184 listTypeConflictResolution :: ListType -> ListType -> ListType
185 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
188 ngramsStatePatchConflictResolution
189 :: TableNgrams.NgramsType
191 -> ConflictResolutionNgramsPatch
192 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
193 = (ours, (const ours, ours), (False, False))
194 -- (False, False) mean here that Mod has always priority.
195 -- (True, False) <- would mean priority to the left (same as ours).
196 -- undefined {- TODO think this through -}, listTypeConflictResolution)
202 -- Insertions are not considered as patches,
203 -- they do not extend history,
204 -- they do not bump version.
205 insertNewOnly :: a -> Maybe b -> a
206 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
207 -- TODO error handling
210 -- TODO refactor with putListNgrams
211 copyListNgrams :: RepoCmdM env err m
212 => NodeId -> NodeId -> NgramsType
214 copyListNgrams srcListId dstListId ngramsType = do
216 liftBase $ modifyMVar_ var $
217 pure . (r_state . at ngramsType %~ (Just . f . something))
220 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
221 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
223 -- TODO refactor with putListNgrams
224 -- The list must be non-empty!
225 -- The added ngrams must be non-existent!
226 addListNgrams :: RepoCmdM env err m
227 => NodeId -> NgramsType
228 -> [NgramsElement] -> m ()
229 addListNgrams listId ngramsType nes = do
231 liftBase $ modifyMVar_ var $
232 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
235 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
238 -- | TODO: incr the Version number
239 -- && should use patch
242 setListNgrams :: HasNodeStory env err m
244 -> TableNgrams.NgramsType
245 -> Map NgramsTerm NgramsRepoElement
247 setListNgrams listId ngramsType ns = do
248 -- printDebug "[setListNgrams]" (listId, ngramsType)
249 getter <- view hasNodeStory
250 var <- liftBase $ (getter ^. nse_getter) [listId]
251 liftBase $ modifyMVar_ var $
261 currentVersion :: HasNodeStory env err m
262 => ListId -> m Version
263 currentVersion listId = do
264 nls <- getRepo [listId]
265 pure $ nls ^. unNodeStory . at listId . _Just . a_version
268 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
269 newNgramsFromNgramsStatePatch p =
270 [ text2ngrams (unNgramsTerm n)
271 | (n,np) <- p ^.. _PatchMap
272 -- . each . _PatchMap
273 . each . _NgramsTablePatch
274 . _PatchMap . ifolded . withIndex
275 , _ <- np ^.. patch_new . _Just
281 commitStatePatch :: (HasNodeStory env err m, HasMail env)
283 -> Versioned NgramsStatePatch'
284 -> m (Versioned NgramsStatePatch')
285 commitStatePatch listId (Versioned p_version p) = do
286 -- printDebug "[commitStatePatch]" listId
287 var <- getNodeStoryVar [listId]
288 vq' <- liftBase $ modifyMVar var $ \ns -> do
290 a = ns ^. unNodeStory . at listId . _Just
291 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
292 (p', q') = transformWith ngramsStatePatchConflictResolution p q
293 a' = a & a_version +~ 1
295 & a_history %~ (p' :)
298 -- Ideally we would like to check these properties. However:
299 -- * They should be checked only to debug the code. The client data
300 -- should be able to trigger these.
301 -- * What kind of error should they throw (we are in IO here)?
302 -- * Should we keep modifyMVar?
303 -- * Should we throw the validation in an Exception, catch it around
304 -- modifyMVar and throw it back as an Error?
305 assertValid $ transformable p q
306 assertValid $ applicable p' (r ^. r_state)
308 printDebug "[commitStatePatch] a version" (a ^. a_version)
309 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
310 pure ( ns & unNodeStory . at listId .~ (Just a')
311 , Versioned (a' ^. a_version) q'
315 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
321 -- This is a special case of tableNgramsPut where the input patch is empty.
322 tableNgramsPull :: HasNodeStory env err m
324 -> TableNgrams.NgramsType
326 -> m (Versioned NgramsTablePatch)
327 tableNgramsPull listId ngramsType p_version = do
328 printDebug "[tableNgramsPull]" (listId, ngramsType)
329 var <- getNodeStoryVar [listId]
330 r <- liftBase $ readMVar var
333 a = r ^. unNodeStory . at listId . _Just
334 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
335 q_table = q ^. _PatchMap . at ngramsType . _Just
337 pure (Versioned (a ^. a_version) q_table)
342 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
343 -- Apply the given patch to the DB and returns the patch to be applied on the
346 tableNgramsPut :: ( HasNodeStory env err m
347 , HasInvalidError err
353 -> Versioned NgramsTablePatch
354 -> m (Versioned NgramsTablePatch)
355 tableNgramsPut tabType listId (Versioned p_version p_table)
356 | p_table == mempty = do
357 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
358 let ngramsType = ngramsTypeFromTabType tabType
359 tableNgramsPull listId ngramsType p_version
362 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
363 let ngramsType = ngramsTypeFromTabType tabType
364 (p, p_validity) = PM.singleton ngramsType p_table
366 assertValid p_validity
368 ret <- commitStatePatch listId (Versioned p_version p)
369 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
375 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
380 => UpdateTableNgramsCharts
383 tableNgramsPostChartsAsync utn logStatus = do
384 let tabType = utn ^. utn_tab_type
385 let listId = utn ^. utn_list_id
387 node <- getNode listId
388 let nId = node ^. node_id
389 _uId = node ^. node_user_id
390 mCId = node ^. node_parent_id
392 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
393 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
397 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
398 pure $ jobLogFail $ jobLogInit 1
402 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
403 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
405 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
410 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
411 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
412 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
414 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
415 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
417 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
418 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
420 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
425 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
426 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
428 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
433 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
434 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
437 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
439 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
441 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
443 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
445 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
447 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
453 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
454 pure $ jobLogFail $ jobLogInit 1
457 { _ne_list :: ListType
458 If we merge the parents/children we can potentially create cycles!
459 , _ne_parent :: Maybe NgramsTerm
460 , _ne_children :: MSet NgramsTerm
464 getNgramsTableMap :: HasNodeStory env err m
466 -> TableNgrams.NgramsType
467 -> m (Versioned NgramsTableMap)
468 getNgramsTableMap nodeId ngramsType = do
469 v <- getNodeStoryVar [nodeId]
470 repo <- liftBase $ readMVar v
471 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
472 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
475 dumpJsonTableMap :: HasNodeStory env err m
478 -> TableNgrams.NgramsType
480 dumpJsonTableMap fpath nodeId ngramsType = do
481 m <- getNgramsTableMap nodeId ngramsType
482 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
489 -- | TODO Errors management
490 -- TODO: polymorphic for Annuaire or Corpus or ...
491 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
492 -- TODO: should take only one ListId
495 getTableNgrams :: forall env err m.
496 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
497 => NodeType -> NodeId -> TabType
498 -> ListId -> Limit -> Maybe Offset
500 -> Maybe MinSize -> Maybe MaxSize
502 -> (NgramsTerm -> Bool)
503 -> m (VersionedWithCount NgramsTable)
504 getTableNgrams _nType nId tabType listId limit_ offset
505 listType minSize maxSize orderBy searchQuery = do
508 -- lIds <- selectNodesWithUsername NodeList userMaster
510 ngramsType = ngramsTypeFromTabType tabType
511 offset' = maybe 0 identity offset
512 listType' = maybe (const True) (==) listType
513 minSize' = maybe (const True) (<=) minSize
514 maxSize' = maybe (const True) (>=) maxSize
516 selected_node n = minSize' s
518 && searchQuery (n ^. ne_ngrams)
519 && listType' (n ^. ne_list)
523 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
525 ---------------------------------------
526 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
527 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
528 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
529 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
530 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
532 ---------------------------------------
533 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
534 filteredNodes tableMap = rootOf <$> list & filter selected_node
536 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
540 list = tableMap ^.. each
542 ---------------------------------------
543 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
544 selectAndPaginate tableMap = roots <> inners
546 list = tableMap ^.. each
547 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
551 selected_nodes = list & take limit_
553 . filter selected_node
554 . sortOnOrder orderBy
555 roots = rootOf <$> selected_nodes
556 rootsSet = Set.fromList (_ne_ngrams <$> roots)
557 inners = list & filter (selected_inner rootsSet)
559 ---------------------------------------
560 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
561 setScores False table = pure table
562 setScores True table = do
563 let ngrams_terms = table ^.. each . ne_ngrams
564 -- printDebug "ngrams_terms" ngrams_terms
566 occurrences <- getOccByNgramsOnlyFast' nId
570 --printDebug "occurrences" occurrences
572 liftBase $ hprint stderr
573 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
574 (length ngrams_terms) t1 t2
576 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
578 pure $ table & each %~ setOcc
579 ---------------------------------------
581 -- lists <- catMaybes <$> listsWith userMaster
582 -- trace (show lists) $
583 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
586 let scoresNeeded = needsScores orderBy
587 tableMap1 <- getNgramsTableMap listId ngramsType
590 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
591 . Map.mapWithKey ngramsElementFromRepo
593 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
596 let fltrCount = length $ fltr ^. v_data . _NgramsTable
599 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
600 . setScores (not scoresNeeded)
603 liftBase $ hprint stderr
604 ("getTableNgrams total=" % hasTime
608 % " sql=" % (if scoresNeeded then "map2" else "map3")
610 ) t0 t3 t0 t1 t1 t2 t2 t3
611 pure $ toVersionedWithCount fltrCount tableMap3
615 scoresRecomputeTableNgrams :: forall env err m.
616 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
617 => NodeId -> TabType -> ListId -> m Int
618 scoresRecomputeTableNgrams nId tabType listId = do
619 tableMap <- getNgramsTableMap listId ngramsType
620 _ <- tableMap & v_data %%~ setScores
621 . Map.mapWithKey ngramsElementFromRepo
625 ngramsType = ngramsTypeFromTabType tabType
627 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
629 let ngrams_terms = table ^.. each . ne_ngrams
630 occurrences <- getOccByNgramsOnlyFast' nId
635 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
637 pure $ table & each %~ setOcc
644 -- TODO: find a better place for the code above, All APIs stay here
646 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
647 deriving (Generic, Enum, Bounded, Read, Show)
649 instance FromHttpApiData OrderBy
651 parseUrlPiece "TermAsc" = pure TermAsc
652 parseUrlPiece "TermDesc" = pure TermDesc
653 parseUrlPiece "ScoreAsc" = pure ScoreAsc
654 parseUrlPiece "ScoreDesc" = pure ScoreDesc
655 parseUrlPiece _ = Left "Unexpected value of OrderBy"
657 instance ToHttpApiData OrderBy where
658 toUrlPiece = pack . show
660 instance ToParamSchema OrderBy
661 instance FromJSON OrderBy
662 instance ToJSON OrderBy
663 instance ToSchema OrderBy
664 instance Arbitrary OrderBy
666 arbitrary = elements [minBound..maxBound]
668 needsScores :: Maybe OrderBy -> Bool
669 needsScores (Just ScoreAsc) = True
670 needsScores (Just ScoreDesc) = True
671 needsScores _ = False
673 type TableNgramsApiGet = Summary " Table Ngrams API Get"
674 :> QueryParamR "ngramsType" TabType
675 :> QueryParamR "list" ListId
676 :> QueryParamR "limit" Limit
677 :> QueryParam "offset" Offset
678 :> QueryParam "listType" ListType
679 :> QueryParam "minTermSize" MinSize
680 :> QueryParam "maxTermSize" MaxSize
681 :> QueryParam "orderBy" OrderBy
682 :> QueryParam "search" Text
683 :> Get '[JSON] (VersionedWithCount NgramsTable)
685 type TableNgramsApiPut = Summary " Table Ngrams API Change"
686 :> QueryParamR "ngramsType" TabType
687 :> QueryParamR "list" ListId
688 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
689 :> Put '[JSON] (Versioned NgramsTablePatch)
691 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
692 :> QueryParamR "ngramsType" TabType
693 :> QueryParamR "list" ListId
694 :> "recompute" :> Post '[JSON] Int
696 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
697 :> QueryParamR "ngramsType" TabType
698 :> QueryParamR "list" ListId
699 :> Get '[JSON] Version
701 type TableNgramsApi = TableNgramsApiGet
702 :<|> TableNgramsApiPut
703 :<|> RecomputeScoresNgramsApiGet
704 :<|> "version" :> TableNgramsApiGetVersion
705 :<|> TableNgramsAsyncApi
707 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
711 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
713 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
720 -> Maybe MinSize -> Maybe MaxSize
722 -> Maybe Text -- full text search
723 -> m (VersionedWithCount NgramsTable)
724 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
725 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
727 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
731 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
736 getTableNgramsVersion _nId _tabType listId = currentVersion listId
741 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
742 -- This line above looks like a waste of computation to finally get only the version.
743 -- See the comment about listNgramsChangedSince.
746 -- | Text search is deactivated for now for ngrams by doc only
747 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
749 -> ListId -> Limit -> Maybe Offset
751 -> Maybe MinSize -> Maybe MaxSize
753 -> Maybe Text -- full text search
754 -> m (VersionedWithCount NgramsTable)
755 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
756 ns <- selectNodesWithUsername NodeList userMaster
757 let ngramsType = ngramsTypeFromTabType tabType
758 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
759 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
760 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
764 apiNgramsTableCorpus :: ( GargServerC env err m
766 => NodeId -> ServerT TableNgramsApi m
767 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
769 :<|> scoresRecomputeTableNgrams cId
770 :<|> getTableNgramsVersion cId
771 :<|> apiNgramsAsync cId
773 apiNgramsTableDoc :: ( GargServerC env err m
775 => DocId -> ServerT TableNgramsApi m
776 apiNgramsTableDoc dId = getTableNgramsDoc dId
778 :<|> scoresRecomputeTableNgrams dId
779 :<|> getTableNgramsVersion dId
780 :<|> apiNgramsAsync dId
782 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
783 apiNgramsAsync _dId =
785 JobFunction $ \i log ->
788 printDebug "tableNgramsPostChartsAsync" x
790 in tableNgramsPostChartsAsync i log'
792 -- Did the given list of ngrams changed since the given version?
793 -- The returned value is versioned boolean value, meaning that one always retrieve the
795 -- If the given version is negative then one simply receive the latest version and True.
796 -- Using this function is more precise than simply comparing the latest version number
797 -- with the local version number. Indeed there might be no change to this particular list
798 -- and still the version number has changed because of other lists.
800 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
801 -- * currentVersion: good computation, good bandwidth, bad precision.
802 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
803 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
804 listNgramsChangedSince :: HasNodeStory env err m
805 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
806 listNgramsChangedSince listId ngramsType version
808 Versioned <$> currentVersion listId <*> pure True
810 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)