2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 get ngrams filtered by NgramsType
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
32 , getTableNgramsCorpus
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
56 , NgramsRepoElement(..)
58 , saveNodeStoryImmediate
74 , VersionedWithCount(..)
76 , listNgramsChangedSince
77 , MinSize, MaxSize, OrderBy, NgramsTable
78 , UpdateTableNgramsCharts
82 import Control.Concurrent
83 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
84 import Control.Monad.Reader
85 import Data.Aeson hiding ((.=))
86 import Data.Either (Either(..))
88 import Data.Map.Strict (Map)
89 import Data.Maybe (fromMaybe)
91 import Data.Ord (Down(..))
92 import Data.Patch.Class (Action(act), Transformable(..), ours)
93 import Data.Swagger hiding (version, patch)
94 import Data.Text (Text, isInfixOf, unpack, pack)
95 import Data.Text.Lazy.IO as DTL
96 import Formatting (hprint, int, (%))
97 import GHC.Generics (Generic)
98 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
99 import Gargantext.API.Admin.Types (HasSettings)
100 import Gargantext.API.Job
101 import Gargantext.API.Ngrams.Types
102 import Gargantext.API.Prelude
103 import Gargantext.Core.NodeStory
104 import Gargantext.Core.Mail.Types (HasMail)
105 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
106 import Gargantext.API.Ngrams.Tools
107 import Gargantext.Database.Action.Flow.Types
108 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
109 import Gargantext.Database.Admin.Config (userMaster)
110 import Gargantext.Database.Admin.Types.Node (NodeType(..))
111 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
112 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
113 import Gargantext.Database.Query.Table.Node (getNode)
114 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
115 import Gargantext.Database.Query.Table.Node.Select
116 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
117 import Gargantext.Prelude hiding (log)
118 import Gargantext.Prelude.Clock (hasTime, getTime)
119 import Prelude (error)
120 import Servant hiding (Patch)
121 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
122 import System.IO (stderr)
123 import Test.QuickCheck (elements)
124 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
125 import qualified Data.Aeson.Text as DAT
126 import qualified Data.List as List
127 import qualified Data.Map.Strict as Map
128 import qualified Data.Map.Strict.Patch as PM
129 import qualified Data.Set as S
130 import qualified Data.Set as Set
131 import qualified Gargantext.API.Metrics as Metrics
132 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
135 -- TODO sequences of modifications (Patchs)
136 type NgramsIdPatch = Patch NgramsId NgramsPatch
138 ngramsPatch :: Int -> NgramsPatch
139 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
141 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
142 toEdit n p = Edit n p
143 ngramsIdPatch :: Patch NgramsId NgramsPatch
144 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
145 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
146 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
149 -- applyPatchBack :: Patch -> IO Patch
150 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
152 ------------------------------------------------------------------------
153 ------------------------------------------------------------------------
154 ------------------------------------------------------------------------
157 -- TODO: Replace.old is ignored which means that if the current list
158 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
159 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
160 -- However this should not happen in non conflicting situations.
161 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
162 mkListsUpdate nt patches =
163 [ (ngramsTypeId nt, ng, listTypeId lt)
164 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
165 , lt <- patch ^.. patch_list . new
168 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
171 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
172 mkChildrenGroups addOrRem nt patches =
173 [ (ngramsTypeId nt, parent, child)
174 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
175 , child <- patch ^.. patch_children . to addOrRem . folded
179 ------------------------------------------------------------------------
181 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
184 saver <- view hasNodeStorySaver
186 Gargantext.Prelude.putStrLn "---- Running node story saver ----"
188 Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
191 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
193 saveNodeStoryImmediate = do
194 saver <- view hasNodeStoryImmediateSaver
196 Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
198 Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
201 listTypeConflictResolution :: ListType -> ListType -> ListType
202 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
205 ngramsStatePatchConflictResolution
206 :: TableNgrams.NgramsType
208 -> ConflictResolutionNgramsPatch
209 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
210 = (ours, (const ours, ours), (False, False))
211 -- (False, False) mean here that Mod has always priority.
212 -- (True, False) <- would mean priority to the left (same as ours).
213 -- undefined {- TODO think this through -}, listTypeConflictResolution)
219 -- Insertions are not considered as patches,
220 -- they do not extend history,
221 -- they do not bump version.
222 insertNewOnly :: a -> Maybe b -> a
223 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
224 -- TODO error handling
227 -- TODO refactor with putListNgrams
228 copyListNgrams :: RepoCmdM env err m
229 => NodeId -> NodeId -> NgramsType
231 copyListNgrams srcListId dstListId ngramsType = do
233 liftBase $ modifyMVar_ var $
234 pure . (r_state . at ngramsType %~ (Just . f . something))
237 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
238 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
240 -- TODO refactor with putListNgrams
241 -- The list must be non-empty!
242 -- The added ngrams must be non-existent!
243 addListNgrams :: RepoCmdM env err m
244 => NodeId -> NgramsType
245 -> [NgramsElement] -> m ()
246 addListNgrams listId ngramsType nes = do
248 liftBase $ modifyMVar_ var $
249 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
252 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
255 -- | TODO: incr the Version number
256 -- && should use patch
259 setListNgrams :: HasNodeStory env err m
261 -> TableNgrams.NgramsType
262 -> Map NgramsTerm NgramsRepoElement
264 setListNgrams listId ngramsType ns = do
265 -- printDebug "[setListNgrams]" (listId, ngramsType)
266 getter <- view hasNodeStory
267 var <- liftBase $ (getter ^. nse_getter) [listId]
268 liftBase $ modifyMVar_ var $
278 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
279 newNgramsFromNgramsStatePatch p =
280 [ text2ngrams (unNgramsTerm n)
281 | (n,np) <- p ^.. _PatchMap
282 -- . each . _PatchMap
283 . each . _NgramsTablePatch
284 . _PatchMap . ifolded . withIndex
285 , _ <- np ^.. patch_new . _Just
291 commitStatePatch :: (HasNodeStory env err m, HasMail env)
293 -> Versioned NgramsStatePatch'
294 -> m (Versioned NgramsStatePatch')
295 commitStatePatch listId (Versioned _p_version p) = do
296 -- printDebug "[commitStatePatch]" listId
297 var <- getNodeStoryVar [listId]
298 vq' <- liftBase $ modifyMVar var $ \ns -> do
300 a = ns ^. unNodeStory . at listId . _Just
301 -- apply patches from version p_version to a ^. a_version
303 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
304 q = mconcat $ a ^. a_history
305 (p', q') = transformWith ngramsStatePatchConflictResolution p q
306 a' = a & a_version +~ 1
308 & a_history %~ (p' :)
311 -- Ideally we would like to check these properties. However:
312 -- * They should be checked only to debug the code. The client data
313 -- should be able to trigger these.
314 -- * What kind of error should they throw (we are in IO here)?
315 -- * Should we keep modifyMVar?
316 -- * Should we throw the validation in an Exception, catch it around
317 -- modifyMVar and throw it back as an Error?
318 assertValid $ transformable p q
319 assertValid $ applicable p' (r ^. r_state)
321 printDebug "[commitStatePatch] a version" (a ^. a_version)
322 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
323 pure ( ns & unNodeStory . at listId .~ (Just a')
324 , Versioned (a' ^. a_version) q'
328 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
334 -- This is a special case of tableNgramsPut where the input patch is empty.
335 tableNgramsPull :: HasNodeStory env err m
337 -> TableNgrams.NgramsType
339 -> m (Versioned NgramsTablePatch)
340 tableNgramsPull listId ngramsType p_version = do
341 printDebug "[tableNgramsPull]" (listId, ngramsType)
342 var <- getNodeStoryVar [listId]
343 r <- liftBase $ readMVar var
346 a = r ^. unNodeStory . at listId . _Just
347 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
348 q_table = q ^. _PatchMap . at ngramsType . _Just
350 pure (Versioned (a ^. a_version) q_table)
355 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
356 -- Apply the given patch to the DB and returns the patch to be applied on the
359 tableNgramsPut :: ( HasNodeStory env err m
360 , HasInvalidError err
366 -> Versioned NgramsTablePatch
367 -> m (Versioned NgramsTablePatch)
368 tableNgramsPut tabType listId (Versioned p_version p_table)
369 | p_table == mempty = do
370 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
371 let ngramsType = ngramsTypeFromTabType tabType
372 tableNgramsPull listId ngramsType p_version
375 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
376 let ngramsType = ngramsTypeFromTabType tabType
377 (p, p_validity) = PM.singleton ngramsType p_table
379 assertValid p_validity
381 ret <- commitStatePatch listId (Versioned p_version p)
382 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
388 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
393 => UpdateTableNgramsCharts
396 tableNgramsPostChartsAsync utn logStatus = do
397 let tabType = utn ^. utn_tab_type
398 let listId = utn ^. utn_list_id
400 node <- getNode listId
401 let nId = node ^. node_id
402 _uId = node ^. node_user_id
403 mCId = node ^. node_parent_id
405 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
406 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
410 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
411 pure $ jobLogFail $ jobLogInit 1
415 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
416 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
418 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
423 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
424 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
425 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
427 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
428 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
430 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
431 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
433 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
438 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
439 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
441 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
446 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
447 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
450 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
452 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
454 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
456 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
458 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
460 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
466 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
467 pure $ jobLogFail $ jobLogInit 1
470 { _ne_list :: ListType
471 If we merge the parents/children we can potentially create cycles!
472 , _ne_parent :: Maybe NgramsTerm
473 , _ne_children :: MSet NgramsTerm
477 getNgramsTableMap :: HasNodeStory env err m
479 -> TableNgrams.NgramsType
480 -> m (Versioned NgramsTableMap)
481 getNgramsTableMap nodeId ngramsType = do
482 v <- getNodeStoryVar [nodeId]
483 repo <- liftBase $ readMVar v
484 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
485 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
488 dumpJsonTableMap :: HasNodeStory env err m
491 -> TableNgrams.NgramsType
493 dumpJsonTableMap fpath nodeId ngramsType = do
494 m <- getNgramsTableMap nodeId ngramsType
495 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
502 -- | TODO Errors management
503 -- TODO: polymorphic for Annuaire or Corpus or ...
504 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
505 -- TODO: should take only one ListId
508 getTableNgrams :: forall env err m.
509 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
510 => NodeType -> NodeId -> TabType
511 -> ListId -> Limit -> Maybe Offset
513 -> Maybe MinSize -> Maybe MaxSize
515 -> (NgramsTerm -> Bool)
516 -> m (VersionedWithCount NgramsTable)
517 getTableNgrams _nType nId tabType listId limit_ offset
518 listType minSize maxSize orderBy searchQuery = do
521 -- lIds <- selectNodesWithUsername NodeList userMaster
523 ngramsType = ngramsTypeFromTabType tabType
524 offset' = maybe 0 identity offset
525 listType' = maybe (const True) (==) listType
526 minSize' = maybe (const True) (<=) minSize
527 maxSize' = maybe (const True) (>=) maxSize
529 selected_node n = minSize' s
531 && searchQuery (n ^. ne_ngrams)
532 && listType' (n ^. ne_list)
536 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
538 ---------------------------------------
539 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
540 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
541 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
542 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
543 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
545 ---------------------------------------
546 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
547 filteredNodes tableMap = rootOf <$> list & filter selected_node
549 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
553 list = tableMap ^.. each
555 ---------------------------------------
556 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
557 selectAndPaginate tableMap = roots <> inners
559 list = tableMap ^.. each
560 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
564 selected_nodes = list & take limit_
566 . filter selected_node
567 . sortOnOrder orderBy
568 roots = rootOf <$> selected_nodes
569 rootsSet = Set.fromList (_ne_ngrams <$> roots)
570 inners = list & filter (selected_inner rootsSet)
572 ---------------------------------------
573 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
574 setScores False table = pure table
575 setScores True table = do
576 let ngrams_terms = table ^.. each . ne_ngrams
577 -- printDebug "ngrams_terms" ngrams_terms
579 occurrences <- getOccByNgramsOnlyFast' nId
583 --printDebug "occurrences" occurrences
585 liftBase $ hprint stderr
586 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
587 (length ngrams_terms) t1 t2
589 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
591 pure $ table & each %~ setOcc
592 ---------------------------------------
594 -- lists <- catMaybes <$> listsWith userMaster
595 -- trace (show lists) $
596 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
599 let scoresNeeded = needsScores orderBy
600 tableMap1 <- getNgramsTableMap listId ngramsType
603 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
604 . Map.mapWithKey ngramsElementFromRepo
606 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
609 let fltrCount = length $ fltr ^. v_data . _NgramsTable
612 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
613 . setScores (not scoresNeeded)
616 liftBase $ hprint stderr
617 ("getTableNgrams total=" % hasTime
621 % " sql=" % (if scoresNeeded then "map2" else "map3")
623 ) t0 t3 t0 t1 t1 t2 t2 t3
624 pure $ toVersionedWithCount fltrCount tableMap3
628 scoresRecomputeTableNgrams :: forall env err m.
629 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
630 => NodeId -> TabType -> ListId -> m Int
631 scoresRecomputeTableNgrams nId tabType listId = do
632 tableMap <- getNgramsTableMap listId ngramsType
633 _ <- tableMap & v_data %%~ setScores
634 . Map.mapWithKey ngramsElementFromRepo
638 ngramsType = ngramsTypeFromTabType tabType
640 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
642 let ngrams_terms = table ^.. each . ne_ngrams
643 occurrences <- getOccByNgramsOnlyFast' nId
648 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
650 pure $ table & each %~ setOcc
657 -- TODO: find a better place for the code above, All APIs stay here
659 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
660 deriving (Generic, Enum, Bounded, Read, Show)
662 instance FromHttpApiData OrderBy
664 parseUrlPiece "TermAsc" = pure TermAsc
665 parseUrlPiece "TermDesc" = pure TermDesc
666 parseUrlPiece "ScoreAsc" = pure ScoreAsc
667 parseUrlPiece "ScoreDesc" = pure ScoreDesc
668 parseUrlPiece _ = Left "Unexpected value of OrderBy"
670 instance ToHttpApiData OrderBy where
671 toUrlPiece = pack . show
673 instance ToParamSchema OrderBy
674 instance FromJSON OrderBy
675 instance ToJSON OrderBy
676 instance ToSchema OrderBy
677 instance Arbitrary OrderBy
679 arbitrary = elements [minBound..maxBound]
681 needsScores :: Maybe OrderBy -> Bool
682 needsScores (Just ScoreAsc) = True
683 needsScores (Just ScoreDesc) = True
684 needsScores _ = False
686 type TableNgramsApiGet = Summary " Table Ngrams API Get"
687 :> QueryParamR "ngramsType" TabType
688 :> QueryParamR "list" ListId
689 :> QueryParamR "limit" Limit
690 :> QueryParam "offset" Offset
691 :> QueryParam "listType" ListType
692 :> QueryParam "minTermSize" MinSize
693 :> QueryParam "maxTermSize" MaxSize
694 :> QueryParam "orderBy" OrderBy
695 :> QueryParam "search" Text
696 :> Get '[JSON] (VersionedWithCount NgramsTable)
698 type TableNgramsApiPut = Summary " Table Ngrams API Change"
699 :> QueryParamR "ngramsType" TabType
700 :> QueryParamR "list" ListId
701 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
702 :> Put '[JSON] (Versioned NgramsTablePatch)
704 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
705 :> QueryParamR "ngramsType" TabType
706 :> QueryParamR "list" ListId
707 :> "recompute" :> Post '[JSON] Int
709 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
710 :> QueryParamR "ngramsType" TabType
711 :> QueryParamR "list" ListId
712 :> Get '[JSON] Version
714 type TableNgramsApi = TableNgramsApiGet
715 :<|> TableNgramsApiPut
716 :<|> RecomputeScoresNgramsApiGet
717 :<|> "version" :> TableNgramsApiGetVersion
718 :<|> TableNgramsAsyncApi
720 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
724 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
726 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
733 -> Maybe MinSize -> Maybe MaxSize
735 -> Maybe Text -- full text search
736 -> m (VersionedWithCount NgramsTable)
737 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
738 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
740 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
744 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
749 getTableNgramsVersion _nId _tabType listId = currentVersion listId
754 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
755 -- This line above looks like a waste of computation to finally get only the version.
756 -- See the comment about listNgramsChangedSince.
759 -- | Text search is deactivated for now for ngrams by doc only
760 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
762 -> ListId -> Limit -> Maybe Offset
764 -> Maybe MinSize -> Maybe MaxSize
766 -> Maybe Text -- full text search
767 -> m (VersionedWithCount NgramsTable)
768 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
769 ns <- selectNodesWithUsername NodeList userMaster
770 let ngramsType = ngramsTypeFromTabType tabType
771 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
772 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
773 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
777 apiNgramsTableCorpus :: ( GargServerC env err m
779 => NodeId -> ServerT TableNgramsApi m
780 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
782 :<|> scoresRecomputeTableNgrams cId
783 :<|> getTableNgramsVersion cId
784 :<|> apiNgramsAsync cId
786 apiNgramsTableDoc :: ( GargServerC env err m
788 => DocId -> ServerT TableNgramsApi m
789 apiNgramsTableDoc dId = getTableNgramsDoc dId
791 :<|> scoresRecomputeTableNgrams dId
792 :<|> getTableNgramsVersion dId
793 :<|> apiNgramsAsync dId
795 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
796 apiNgramsAsync _dId =
798 JobFunction $ \i log ->
801 printDebug "tableNgramsPostChartsAsync" x
803 in tableNgramsPostChartsAsync i log'
805 -- Did the given list of ngrams changed since the given version?
806 -- The returned value is versioned boolean value, meaning that one always retrieve the
808 -- If the given version is negative then one simply receive the latest version and True.
809 -- Using this function is more precise than simply comparing the latest version number
810 -- with the local version number. Indeed there might be no change to this particular list
811 -- and still the version number has changed because of other lists.
813 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
814 -- * currentVersion: good computation, good bandwidth, bad precision.
815 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
816 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
817 listNgramsChangedSince :: HasNodeStory env err m
818 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
819 listNgramsChangedSince listId ngramsType version
821 Versioned <$> currentVersion listId <*> pure True
823 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)