2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 get ngrams filtered by NgramsType
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
32 , getTableNgramsCorpus
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
56 , NgramsRepoElement(..)
73 , VersionedWithCount(..)
75 , listNgramsChangedSince
76 , MinSize, MaxSize, OrderBy, NgramsTable
77 , UpdateTableNgramsCharts
81 import Control.Concurrent
82 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
83 import Control.Monad.Reader
84 import Data.Aeson hiding ((.=))
85 import Data.Either (Either(..))
87 import Data.Map.Strict (Map)
88 import Data.Maybe (fromMaybe)
90 import Data.Ord (Down(..))
91 import Data.Patch.Class (Action(act), Transformable(..), ours)
92 import Data.Swagger hiding (version, patch)
93 import Data.Text (Text, isInfixOf, unpack, pack)
94 import Data.Text.Lazy.IO as DTL
95 import Formatting (hprint, int, (%))
96 import GHC.Generics (Generic)
97 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
98 import Gargantext.API.Admin.Types (HasSettings)
99 import Gargantext.API.Job
100 import Gargantext.API.Ngrams.Types
101 import Gargantext.API.Prelude
102 import Gargantext.Core.NodeStory
103 import Gargantext.Core.Mail.Types (HasMail)
104 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
105 import Gargantext.API.Ngrams.Tools
106 import Gargantext.Database.Action.Flow.Types
107 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
108 import Gargantext.Database.Admin.Config (userMaster)
109 import Gargantext.Database.Admin.Types.Node (NodeType(..))
110 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
111 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
112 import Gargantext.Database.Query.Table.Node (getNode)
113 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
114 import Gargantext.Database.Query.Table.Node.Select
115 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
116 import Gargantext.Prelude hiding (log)
117 import Gargantext.Prelude.Clock (hasTime, getTime)
118 import Prelude (error)
119 import Servant hiding (Patch)
120 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
121 import System.IO (stderr)
122 import Test.QuickCheck (elements)
123 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
124 import qualified Data.Aeson.Text as DAT
125 import qualified Data.List as List
126 import qualified Data.Map.Strict as Map
127 import qualified Data.Map.Strict.Patch as PM
128 import qualified Data.Set as S
129 import qualified Data.Set as Set
130 import qualified Gargantext.API.Metrics as Metrics
131 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
134 -- TODO sequences of modifications (Patchs)
135 type NgramsIdPatch = Patch NgramsId NgramsPatch
137 ngramsPatch :: Int -> NgramsPatch
138 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
140 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
141 toEdit n p = Edit n p
142 ngramsIdPatch :: Patch NgramsId NgramsPatch
143 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
144 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
145 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
148 -- applyPatchBack :: Patch -> IO Patch
149 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
151 ------------------------------------------------------------------------
152 ------------------------------------------------------------------------
153 ------------------------------------------------------------------------
156 -- TODO: Replace.old is ignored which means that if the current list
157 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
158 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
159 -- However this should not happen in non conflicting situations.
160 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
161 mkListsUpdate nt patches =
162 [ (ngramsTypeId nt, ng, listTypeId lt)
163 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
164 , lt <- patch ^.. patch_list . new
167 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
170 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
171 mkChildrenGroups addOrRem nt patches =
172 [ (ngramsTypeId nt, parent, child)
173 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
174 , child <- patch ^.. patch_children . to addOrRem . folded
178 ------------------------------------------------------------------------
180 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
182 saveNodeStory = liftBase =<< view hasNodeStorySaver
185 listTypeConflictResolution :: ListType -> ListType -> ListType
186 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
189 ngramsStatePatchConflictResolution
190 :: TableNgrams.NgramsType
192 -> ConflictResolutionNgramsPatch
193 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
194 = (ours, (const ours, ours), (False, False))
195 -- (False, False) mean here that Mod has always priority.
196 -- (True, False) <- would mean priority to the left (same as ours).
197 -- undefined {- TODO think this through -}, listTypeConflictResolution)
203 -- Insertions are not considered as patches,
204 -- they do not extend history,
205 -- they do not bump version.
206 insertNewOnly :: a -> Maybe b -> a
207 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
208 -- TODO error handling
211 -- TODO refactor with putListNgrams
212 copyListNgrams :: RepoCmdM env err m
213 => NodeId -> NodeId -> NgramsType
215 copyListNgrams srcListId dstListId ngramsType = do
217 liftBase $ modifyMVar_ var $
218 pure . (r_state . at ngramsType %~ (Just . f . something))
221 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
222 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
224 -- TODO refactor with putListNgrams
225 -- The list must be non-empty!
226 -- The added ngrams must be non-existent!
227 addListNgrams :: RepoCmdM env err m
228 => NodeId -> NgramsType
229 -> [NgramsElement] -> m ()
230 addListNgrams listId ngramsType nes = do
232 liftBase $ modifyMVar_ var $
233 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
236 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
239 -- | TODO: incr the Version number
240 -- && should use patch
243 setListNgrams :: HasNodeStory env err m
245 -> TableNgrams.NgramsType
246 -> Map NgramsTerm NgramsRepoElement
248 setListNgrams listId ngramsType ns = do
249 -- printDebug "[setListNgrams]" (listId, ngramsType)
250 getter <- view hasNodeStory
251 var <- liftBase $ (getter ^. nse_getter) [listId]
252 liftBase $ modifyMVar_ var $
262 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
263 newNgramsFromNgramsStatePatch p =
264 [ text2ngrams (unNgramsTerm n)
265 | (n,np) <- p ^.. _PatchMap
266 -- . each . _PatchMap
267 . each . _NgramsTablePatch
268 . _PatchMap . ifolded . withIndex
269 , _ <- np ^.. patch_new . _Just
275 commitStatePatch :: (HasNodeStory env err m, HasMail env)
277 -> Versioned NgramsStatePatch'
278 -> m (Versioned NgramsStatePatch')
279 commitStatePatch listId (Versioned _p_version p) = do
280 -- printDebug "[commitStatePatch]" listId
281 var <- getNodeStoryVar [listId]
282 vq' <- liftBase $ modifyMVar var $ \ns -> do
284 a = ns ^. unNodeStory . at listId . _Just
285 -- apply patches from version p_version to a ^. a_version
287 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
288 q = mconcat $ a ^. a_history
289 (p', q') = transformWith ngramsStatePatchConflictResolution p q
290 a' = a & a_version +~ 1
292 & a_history %~ (p' :)
295 -- Ideally we would like to check these properties. However:
296 -- * They should be checked only to debug the code. The client data
297 -- should be able to trigger these.
298 -- * What kind of error should they throw (we are in IO here)?
299 -- * Should we keep modifyMVar?
300 -- * Should we throw the validation in an Exception, catch it around
301 -- modifyMVar and throw it back as an Error?
302 assertValid $ transformable p q
303 assertValid $ applicable p' (r ^. r_state)
305 printDebug "[commitStatePatch] a version" (a ^. a_version)
306 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
307 pure ( ns & unNodeStory . at listId .~ (Just a')
308 , Versioned (a' ^. a_version) q'
312 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
318 -- This is a special case of tableNgramsPut where the input patch is empty.
319 tableNgramsPull :: HasNodeStory env err m
321 -> TableNgrams.NgramsType
323 -> m (Versioned NgramsTablePatch)
324 tableNgramsPull listId ngramsType p_version = do
325 printDebug "[tableNgramsPull]" (listId, ngramsType)
326 var <- getNodeStoryVar [listId]
327 r <- liftBase $ readMVar var
330 a = r ^. unNodeStory . at listId . _Just
331 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
332 q_table = q ^. _PatchMap . at ngramsType . _Just
334 pure (Versioned (a ^. a_version) q_table)
339 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
340 -- Apply the given patch to the DB and returns the patch to be applied on the
343 tableNgramsPut :: ( HasNodeStory env err m
344 , HasInvalidError err
350 -> Versioned NgramsTablePatch
351 -> m (Versioned NgramsTablePatch)
352 tableNgramsPut tabType listId (Versioned p_version p_table)
353 | p_table == mempty = do
354 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
355 let ngramsType = ngramsTypeFromTabType tabType
356 tableNgramsPull listId ngramsType p_version
359 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
360 let ngramsType = ngramsTypeFromTabType tabType
361 (p, p_validity) = PM.singleton ngramsType p_table
363 assertValid p_validity
365 ret <- commitStatePatch listId (Versioned p_version p)
366 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
372 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
377 => UpdateTableNgramsCharts
380 tableNgramsPostChartsAsync utn logStatus = do
381 let tabType = utn ^. utn_tab_type
382 let listId = utn ^. utn_list_id
384 node <- getNode listId
385 let nId = node ^. node_id
386 _uId = node ^. node_user_id
387 mCId = node ^. node_parent_id
389 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
390 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
394 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
395 pure $ jobLogFail $ jobLogInit 1
399 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
400 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
402 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
407 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
408 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
409 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
411 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
412 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
414 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
415 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
417 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
422 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
423 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
425 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
430 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
431 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
434 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
436 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
438 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
440 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
442 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
444 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
450 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
451 pure $ jobLogFail $ jobLogInit 1
454 { _ne_list :: ListType
455 If we merge the parents/children we can potentially create cycles!
456 , _ne_parent :: Maybe NgramsTerm
457 , _ne_children :: MSet NgramsTerm
461 getNgramsTableMap :: HasNodeStory env err m
463 -> TableNgrams.NgramsType
464 -> m (Versioned NgramsTableMap)
465 getNgramsTableMap nodeId ngramsType = do
466 v <- getNodeStoryVar [nodeId]
467 repo <- liftBase $ readMVar v
468 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
469 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
472 dumpJsonTableMap :: HasNodeStory env err m
475 -> TableNgrams.NgramsType
477 dumpJsonTableMap fpath nodeId ngramsType = do
478 m <- getNgramsTableMap nodeId ngramsType
479 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
486 -- | TODO Errors management
487 -- TODO: polymorphic for Annuaire or Corpus or ...
488 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
489 -- TODO: should take only one ListId
492 getTableNgrams :: forall env err m.
493 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
494 => NodeType -> NodeId -> TabType
495 -> ListId -> Limit -> Maybe Offset
497 -> Maybe MinSize -> Maybe MaxSize
499 -> (NgramsTerm -> Bool)
500 -> m (VersionedWithCount NgramsTable)
501 getTableNgrams _nType nId tabType listId limit_ offset
502 listType minSize maxSize orderBy searchQuery = do
505 -- lIds <- selectNodesWithUsername NodeList userMaster
507 ngramsType = ngramsTypeFromTabType tabType
508 offset' = maybe 0 identity offset
509 listType' = maybe (const True) (==) listType
510 minSize' = maybe (const True) (<=) minSize
511 maxSize' = maybe (const True) (>=) maxSize
513 selected_node n = minSize' s
515 && searchQuery (n ^. ne_ngrams)
516 && listType' (n ^. ne_list)
520 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
522 ---------------------------------------
523 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
524 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
525 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
526 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
527 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
529 ---------------------------------------
530 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
531 filteredNodes tableMap = rootOf <$> list & filter selected_node
533 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
537 list = tableMap ^.. each
539 ---------------------------------------
540 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
541 selectAndPaginate tableMap = roots <> inners
543 list = tableMap ^.. each
544 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
548 selected_nodes = list & take limit_
550 . filter selected_node
551 . sortOnOrder orderBy
552 roots = rootOf <$> selected_nodes
553 rootsSet = Set.fromList (_ne_ngrams <$> roots)
554 inners = list & filter (selected_inner rootsSet)
556 ---------------------------------------
557 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
558 setScores False table = pure table
559 setScores True table = do
560 let ngrams_terms = table ^.. each . ne_ngrams
561 -- printDebug "ngrams_terms" ngrams_terms
563 occurrences <- getOccByNgramsOnlyFast' nId
567 --printDebug "occurrences" occurrences
569 liftBase $ hprint stderr
570 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
571 (length ngrams_terms) t1 t2
573 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
575 pure $ table & each %~ setOcc
576 ---------------------------------------
578 -- lists <- catMaybes <$> listsWith userMaster
579 -- trace (show lists) $
580 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
583 let scoresNeeded = needsScores orderBy
584 tableMap1 <- getNgramsTableMap listId ngramsType
587 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
588 . Map.mapWithKey ngramsElementFromRepo
590 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
593 let fltrCount = length $ fltr ^. v_data . _NgramsTable
596 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
597 . setScores (not scoresNeeded)
600 liftBase $ hprint stderr
601 ("getTableNgrams total=" % hasTime
605 % " sql=" % (if scoresNeeded then "map2" else "map3")
607 ) t0 t3 t0 t1 t1 t2 t2 t3
608 pure $ toVersionedWithCount fltrCount tableMap3
612 scoresRecomputeTableNgrams :: forall env err m.
613 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
614 => NodeId -> TabType -> ListId -> m Int
615 scoresRecomputeTableNgrams nId tabType listId = do
616 tableMap <- getNgramsTableMap listId ngramsType
617 _ <- tableMap & v_data %%~ setScores
618 . Map.mapWithKey ngramsElementFromRepo
622 ngramsType = ngramsTypeFromTabType tabType
624 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
626 let ngrams_terms = table ^.. each . ne_ngrams
627 occurrences <- getOccByNgramsOnlyFast' nId
632 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
634 pure $ table & each %~ setOcc
641 -- TODO: find a better place for the code above, All APIs stay here
643 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
644 deriving (Generic, Enum, Bounded, Read, Show)
646 instance FromHttpApiData OrderBy
648 parseUrlPiece "TermAsc" = pure TermAsc
649 parseUrlPiece "TermDesc" = pure TermDesc
650 parseUrlPiece "ScoreAsc" = pure ScoreAsc
651 parseUrlPiece "ScoreDesc" = pure ScoreDesc
652 parseUrlPiece _ = Left "Unexpected value of OrderBy"
654 instance ToHttpApiData OrderBy where
655 toUrlPiece = pack . show
657 instance ToParamSchema OrderBy
658 instance FromJSON OrderBy
659 instance ToJSON OrderBy
660 instance ToSchema OrderBy
661 instance Arbitrary OrderBy
663 arbitrary = elements [minBound..maxBound]
665 needsScores :: Maybe OrderBy -> Bool
666 needsScores (Just ScoreAsc) = True
667 needsScores (Just ScoreDesc) = True
668 needsScores _ = False
670 type TableNgramsApiGet = Summary " Table Ngrams API Get"
671 :> QueryParamR "ngramsType" TabType
672 :> QueryParamR "list" ListId
673 :> QueryParamR "limit" Limit
674 :> QueryParam "offset" Offset
675 :> QueryParam "listType" ListType
676 :> QueryParam "minTermSize" MinSize
677 :> QueryParam "maxTermSize" MaxSize
678 :> QueryParam "orderBy" OrderBy
679 :> QueryParam "search" Text
680 :> Get '[JSON] (VersionedWithCount NgramsTable)
682 type TableNgramsApiPut = Summary " Table Ngrams API Change"
683 :> QueryParamR "ngramsType" TabType
684 :> QueryParamR "list" ListId
685 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
686 :> Put '[JSON] (Versioned NgramsTablePatch)
688 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
689 :> QueryParamR "ngramsType" TabType
690 :> QueryParamR "list" ListId
691 :> "recompute" :> Post '[JSON] Int
693 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
694 :> QueryParamR "ngramsType" TabType
695 :> QueryParamR "list" ListId
696 :> Get '[JSON] Version
698 type TableNgramsApi = TableNgramsApiGet
699 :<|> TableNgramsApiPut
700 :<|> RecomputeScoresNgramsApiGet
701 :<|> "version" :> TableNgramsApiGetVersion
702 :<|> TableNgramsAsyncApi
704 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
708 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
710 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
717 -> Maybe MinSize -> Maybe MaxSize
719 -> Maybe Text -- full text search
720 -> m (VersionedWithCount NgramsTable)
721 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
722 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
724 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
728 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
733 getTableNgramsVersion _nId _tabType listId = currentVersion listId
738 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
739 -- This line above looks like a waste of computation to finally get only the version.
740 -- See the comment about listNgramsChangedSince.
743 -- | Text search is deactivated for now for ngrams by doc only
744 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
746 -> ListId -> Limit -> Maybe Offset
748 -> Maybe MinSize -> Maybe MaxSize
750 -> Maybe Text -- full text search
751 -> m (VersionedWithCount NgramsTable)
752 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
753 ns <- selectNodesWithUsername NodeList userMaster
754 let ngramsType = ngramsTypeFromTabType tabType
755 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
756 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
757 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
761 apiNgramsTableCorpus :: ( GargServerC env err m
763 => NodeId -> ServerT TableNgramsApi m
764 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
766 :<|> scoresRecomputeTableNgrams cId
767 :<|> getTableNgramsVersion cId
768 :<|> apiNgramsAsync cId
770 apiNgramsTableDoc :: ( GargServerC env err m
772 => DocId -> ServerT TableNgramsApi m
773 apiNgramsTableDoc dId = getTableNgramsDoc dId
775 :<|> scoresRecomputeTableNgrams dId
776 :<|> getTableNgramsVersion dId
777 :<|> apiNgramsAsync dId
779 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
780 apiNgramsAsync _dId =
782 JobFunction $ \i log ->
785 printDebug "tableNgramsPostChartsAsync" x
787 in tableNgramsPostChartsAsync i log'
789 -- Did the given list of ngrams changed since the given version?
790 -- The returned value is versioned boolean value, meaning that one always retrieve the
792 -- If the given version is negative then one simply receive the latest version and True.
793 -- Using this function is more precise than simply comparing the latest version number
794 -- with the local version number. Indeed there might be no change to this particular list
795 -- and still the version number has changed because of other lists.
797 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
798 -- * currentVersion: good computation, good bandwidth, bad precision.
799 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
800 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
801 listNgramsChangedSince :: HasNodeStory env err m
802 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
803 listNgramsChangedSince listId ngramsType version
805 Versioned <$> currentVersion listId <*> pure True
807 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)