2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 get ngrams filtered by NgramsType
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
32 , getTableNgramsCorpus
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
56 , NgramsRepoElement(..)
58 , saveNodeStoryImmediate
74 , VersionedWithCount(..)
76 , listNgramsChangedSince
77 , MinSize, MaxSize, OrderBy, NgramsTable
78 , UpdateTableNgramsCharts
82 import Control.Concurrent
83 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
84 import Control.Monad.Reader
85 import Data.Aeson hiding ((.=))
86 import Data.Either (Either(..))
88 import Data.Map.Strict (Map)
89 import Data.Maybe (fromMaybe)
91 import Data.Ord (Down(..))
92 import Data.Patch.Class (Action(act), Transformable(..), ours)
93 import Data.Swagger hiding (version, patch)
94 import Data.Text (Text, isInfixOf, unpack, pack)
95 import Data.Text.Lazy.IO as DTL
96 import Formatting (hprint, int, (%))
97 import GHC.Generics (Generic)
98 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
99 import Gargantext.API.Admin.Types (HasSettings)
100 import Gargantext.API.Job
101 import Gargantext.API.Ngrams.Types
102 import Gargantext.API.Prelude
103 import Gargantext.Core.NodeStory
104 import Gargantext.Core.Mail.Types (HasMail)
105 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
106 import Gargantext.API.Ngrams.Tools
107 import Gargantext.Database.Action.Flow.Types
108 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
109 import Gargantext.Database.Admin.Config (userMaster)
110 import Gargantext.Database.Admin.Types.Node (NodeType(..))
111 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
112 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
113 import Gargantext.Database.Query.Table.Node (getNode)
114 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
115 import Gargantext.Database.Query.Table.Node.Select
116 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
117 import Gargantext.Prelude hiding (log)
118 import Gargantext.Prelude.Clock (hasTime, getTime)
119 import Prelude (error)
120 import Servant hiding (Patch)
121 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
122 import System.IO (stderr)
123 import Test.QuickCheck (elements)
124 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
125 import qualified Data.Aeson.Text as DAT
126 import qualified Data.List as List
127 import qualified Data.Map.Strict as Map
128 import qualified Data.Map.Strict.Patch as PM
129 import qualified Data.Set as S
130 import qualified Data.Set as Set
131 import qualified Gargantext.API.Metrics as Metrics
132 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
135 -- TODO sequences of modifications (Patchs)
136 type NgramsIdPatch = Patch NgramsId NgramsPatch
138 ngramsPatch :: Int -> NgramsPatch
139 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
141 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
142 toEdit n p = Edit n p
143 ngramsIdPatch :: Patch NgramsId NgramsPatch
144 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
145 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
146 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
149 -- applyPatchBack :: Patch -> IO Patch
150 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
152 ------------------------------------------------------------------------
153 ------------------------------------------------------------------------
154 ------------------------------------------------------------------------
157 -- TODO: Replace.old is ignored which means that if the current list
158 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
159 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
160 -- However this should not happen in non conflicting situations.
161 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
162 mkListsUpdate nt patches =
163 [ (ngramsTypeId nt, ng, listTypeId lt)
164 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
165 , lt <- patch ^.. patch_list . new
168 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
171 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
172 mkChildrenGroups addOrRem nt patches =
173 [ (ngramsTypeId nt, parent, child)
174 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
175 , child <- patch ^.. patch_children . to addOrRem . folded
179 ------------------------------------------------------------------------
181 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
184 saver <- view hasNodeStorySaver
186 Gargantext.Prelude.putStrLn "---- Running node story saver ----"
188 Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
191 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
193 saveNodeStoryImmediate = do
194 saver <- view hasNodeStoryImmediateSaver
196 Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
198 Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
201 listTypeConflictResolution :: ListType -> ListType -> ListType
202 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
205 ngramsStatePatchConflictResolution
206 :: TableNgrams.NgramsType
208 -> ConflictResolutionNgramsPatch
209 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
210 -- = (ours, (const ours, ours), (False, False))
211 -- (False, False) mean here that Mod has always priority.
212 = (ours, (const ours, ours), (True, False))
213 -- (True, False) <- would mean priority to the left (same as ours).
214 -- undefined {- TODO think this through -}, listTypeConflictResolution)
220 -- Insertions are not considered as patches,
221 -- they do not extend history,
222 -- they do not bump version.
223 insertNewOnly :: a -> Maybe b -> a
224 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
225 -- TODO error handling
228 -- TODO refactor with putListNgrams
229 copyListNgrams :: RepoCmdM env err m
230 => NodeId -> NodeId -> NgramsType
232 copyListNgrams srcListId dstListId ngramsType = do
234 liftBase $ modifyMVar_ var $
235 pure . (r_state . at ngramsType %~ (Just . f . something))
238 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
239 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
241 -- TODO refactor with putListNgrams
242 -- The list must be non-empty!
243 -- The added ngrams must be non-existent!
244 addListNgrams :: RepoCmdM env err m
245 => NodeId -> NgramsType
246 -> [NgramsElement] -> m ()
247 addListNgrams listId ngramsType nes = do
249 liftBase $ modifyMVar_ var $
250 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
253 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
256 -- | TODO: incr the Version number
257 -- && should use patch
260 setListNgrams :: HasNodeStory env err m
262 -> TableNgrams.NgramsType
263 -> Map NgramsTerm NgramsRepoElement
265 setListNgrams listId ngramsType ns = do
266 -- printDebug "[setListNgrams]" (listId, ngramsType)
267 getter <- view hasNodeStory
268 var <- liftBase $ (getter ^. nse_getter) [listId]
269 liftBase $ modifyMVar_ var $
279 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
280 newNgramsFromNgramsStatePatch p =
281 [ text2ngrams (unNgramsTerm n)
282 | (n,np) <- p ^.. _PatchMap
283 -- . each . _PatchMap
284 . each . _NgramsTablePatch
285 . _PatchMap . ifolded . withIndex
286 , _ <- np ^.. patch_new . _Just
292 commitStatePatch :: (HasNodeStory env err m, HasMail env)
294 -> Versioned NgramsStatePatch'
295 -> m (Versioned NgramsStatePatch')
296 commitStatePatch listId (Versioned _p_version p) = do
297 -- printDebug "[commitStatePatch]" listId
298 var <- getNodeStoryVar [listId]
299 vq' <- liftBase $ modifyMVar var $ \ns -> do
301 a = ns ^. unNodeStory . at listId . _Just
302 -- apply patches from version p_version to a ^. a_version
304 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
305 q = mconcat $ a ^. a_history
307 printDebug "transformWith" (p,q)
310 (p', q') = transformWith ngramsStatePatchConflictResolution p q
311 a' = a & a_version +~ 1
313 & a_history %~ (p' :)
316 -- Ideally we would like to check these properties. However:
317 -- * They should be checked only to debug the code. The client data
318 -- should be able to trigger these.
319 -- * What kind of error should they throw (we are in IO here)?
320 -- * Should we keep modifyMVar?
321 -- * Should we throw the validation in an Exception, catch it around
322 -- modifyMVar and throw it back as an Error?
323 assertValid $ transformable p q
324 assertValid $ applicable p' (r ^. r_state)
326 printDebug "[commitStatePatch] a version" (a ^. a_version)
327 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
328 pure ( ns & unNodeStory . at listId .~ (Just a')
329 , Versioned (a' ^. a_version) q'
333 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
339 -- This is a special case of tableNgramsPut where the input patch is empty.
340 tableNgramsPull :: HasNodeStory env err m
342 -> TableNgrams.NgramsType
344 -> m (Versioned NgramsTablePatch)
345 tableNgramsPull listId ngramsType p_version = do
346 printDebug "[tableNgramsPull]" (listId, ngramsType)
347 var <- getNodeStoryVar [listId]
348 r <- liftBase $ readMVar var
351 a = r ^. unNodeStory . at listId . _Just
352 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
353 q_table = q ^. _PatchMap . at ngramsType . _Just
355 pure (Versioned (a ^. a_version) q_table)
360 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
361 -- Apply the given patch to the DB and returns the patch to be applied on the
364 tableNgramsPut :: ( HasNodeStory env err m
365 , HasInvalidError err
371 -> Versioned NgramsTablePatch
372 -> m (Versioned NgramsTablePatch)
373 tableNgramsPut tabType listId (Versioned p_version p_table)
374 | p_table == mempty = do
375 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
376 let ngramsType = ngramsTypeFromTabType tabType
377 tableNgramsPull listId ngramsType p_version
380 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
381 let ngramsType = ngramsTypeFromTabType tabType
382 (p, p_validity) = PM.singleton ngramsType p_table
384 assertValid p_validity
386 ret <- commitStatePatch listId (Versioned p_version p)
387 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
393 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
398 => UpdateTableNgramsCharts
401 tableNgramsPostChartsAsync utn logStatus = do
402 let tabType = utn ^. utn_tab_type
403 let listId = utn ^. utn_list_id
405 node <- getNode listId
406 let nId = node ^. node_id
407 _uId = node ^. node_user_id
408 mCId = node ^. node_parent_id
410 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
411 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
415 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
416 pure $ jobLogFail $ jobLogInit 1
420 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
421 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
423 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
428 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
429 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
430 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
432 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
433 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
435 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
436 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
438 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
443 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
444 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
446 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
451 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
452 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
455 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
457 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
459 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
461 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
463 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
465 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
471 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
472 pure $ jobLogFail $ jobLogInit 1
475 { _ne_list :: ListType
476 If we merge the parents/children we can potentially create cycles!
477 , _ne_parent :: Maybe NgramsTerm
478 , _ne_children :: MSet NgramsTerm
482 getNgramsTableMap :: HasNodeStory env err m
484 -> TableNgrams.NgramsType
485 -> m (Versioned NgramsTableMap)
486 getNgramsTableMap nodeId ngramsType = do
487 v <- getNodeStoryVar [nodeId]
488 repo <- liftBase $ readMVar v
489 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
490 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
493 dumpJsonTableMap :: HasNodeStory env err m
496 -> TableNgrams.NgramsType
498 dumpJsonTableMap fpath nodeId ngramsType = do
499 m <- getNgramsTableMap nodeId ngramsType
500 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
507 -- | TODO Errors management
508 -- TODO: polymorphic for Annuaire or Corpus or ...
509 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
510 -- TODO: should take only one ListId
513 getTableNgrams :: forall env err m.
514 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
515 => NodeType -> NodeId -> TabType
516 -> ListId -> Limit -> Maybe Offset
518 -> Maybe MinSize -> Maybe MaxSize
520 -> (NgramsTerm -> Bool)
521 -> m (VersionedWithCount NgramsTable)
522 getTableNgrams _nType nId tabType listId limit_ offset
523 listType minSize maxSize orderBy searchQuery = do
526 -- lIds <- selectNodesWithUsername NodeList userMaster
528 ngramsType = ngramsTypeFromTabType tabType
529 offset' = maybe 0 identity offset
530 listType' = maybe (const True) (==) listType
531 minSize' = maybe (const True) (<=) minSize
532 maxSize' = maybe (const True) (>=) maxSize
534 selected_node n = minSize' s
536 && searchQuery (n ^. ne_ngrams)
537 && listType' (n ^. ne_list)
541 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
543 ---------------------------------------
544 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
545 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
546 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
547 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
548 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
550 ---------------------------------------
551 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
552 filteredNodes tableMap = rootOf <$> list & filter selected_node
554 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
558 list = tableMap ^.. each
560 ---------------------------------------
561 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
562 selectAndPaginate tableMap = roots <> inners
564 list = tableMap ^.. each
565 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
569 selected_nodes = list & take limit_
571 . filter selected_node
572 . sortOnOrder orderBy
573 roots = rootOf <$> selected_nodes
574 rootsSet = Set.fromList (_ne_ngrams <$> roots)
575 inners = list & filter (selected_inner rootsSet)
577 ---------------------------------------
578 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
579 setScores False table = pure table
580 setScores True table = do
581 let ngrams_terms = table ^.. each . ne_ngrams
582 -- printDebug "ngrams_terms" ngrams_terms
584 occurrences <- getOccByNgramsOnlyFast nId
587 --printDebug "occurrences" occurrences
589 liftBase $ hprint stderr
590 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
591 (length ngrams_terms) t1 t2
593 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
595 pure $ table & each %~ setOcc
596 ---------------------------------------
598 -- lists <- catMaybes <$> listsWith userMaster
599 -- trace (show lists) $
600 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
603 let scoresNeeded = needsScores orderBy
604 tableMap1 <- getNgramsTableMap listId ngramsType
607 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
608 . Map.mapWithKey ngramsElementFromRepo
610 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
613 let fltrCount = length $ fltr ^. v_data . _NgramsTable
616 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
617 . setScores (not scoresNeeded)
620 liftBase $ hprint stderr
621 ("getTableNgrams total=" % hasTime
625 % " sql=" % (if scoresNeeded then "map2" else "map3")
627 ) t0 t3 t0 t1 t1 t2 t2 t3
628 pure $ toVersionedWithCount fltrCount tableMap3
632 scoresRecomputeTableNgrams :: forall env err m.
633 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
634 => NodeId -> TabType -> ListId -> m Int
635 scoresRecomputeTableNgrams nId tabType listId = do
636 tableMap <- getNgramsTableMap listId ngramsType
637 _ <- tableMap & v_data %%~ setScores
638 . Map.mapWithKey ngramsElementFromRepo
642 ngramsType = ngramsTypeFromTabType tabType
644 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
646 occurrences <- getOccByNgramsOnlyFast nId
650 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
652 pure $ table & each %~ setOcc
657 -- TODO: find a better place for the code above, All APIs stay here
659 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
660 deriving (Generic, Enum, Bounded, Read, Show)
662 instance FromHttpApiData OrderBy
664 parseUrlPiece "TermAsc" = pure TermAsc
665 parseUrlPiece "TermDesc" = pure TermDesc
666 parseUrlPiece "ScoreAsc" = pure ScoreAsc
667 parseUrlPiece "ScoreDesc" = pure ScoreDesc
668 parseUrlPiece _ = Left "Unexpected value of OrderBy"
670 instance ToHttpApiData OrderBy where
671 toUrlPiece = pack . show
673 instance ToParamSchema OrderBy
674 instance FromJSON OrderBy
675 instance ToJSON OrderBy
676 instance ToSchema OrderBy
677 instance Arbitrary OrderBy
679 arbitrary = elements [minBound..maxBound]
681 needsScores :: Maybe OrderBy -> Bool
682 needsScores (Just ScoreAsc) = True
683 needsScores (Just ScoreDesc) = True
684 needsScores _ = False
686 type TableNgramsApiGet = Summary " Table Ngrams API Get"
687 :> QueryParamR "ngramsType" TabType
688 :> QueryParamR "list" ListId
689 :> QueryParamR "limit" Limit
690 :> QueryParam "offset" Offset
691 :> QueryParam "listType" ListType
692 :> QueryParam "minTermSize" MinSize
693 :> QueryParam "maxTermSize" MaxSize
694 :> QueryParam "orderBy" OrderBy
695 :> QueryParam "search" Text
696 :> Get '[JSON] (VersionedWithCount NgramsTable)
698 type TableNgramsApiPut = Summary " Table Ngrams API Change"
699 :> QueryParamR "ngramsType" TabType
700 :> QueryParamR "list" ListId
701 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
702 :> Put '[JSON] (Versioned NgramsTablePatch)
704 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
705 :> QueryParamR "ngramsType" TabType
706 :> QueryParamR "list" ListId
707 :> "recompute" :> Post '[JSON] Int
709 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
710 :> QueryParamR "ngramsType" TabType
711 :> QueryParamR "list" ListId
712 :> Get '[JSON] Version
714 type TableNgramsApi = TableNgramsApiGet
715 :<|> TableNgramsApiPut
716 :<|> RecomputeScoresNgramsApiGet
717 :<|> "version" :> TableNgramsApiGetVersion
718 :<|> TableNgramsAsyncApi
720 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
724 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
726 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
733 -> Maybe MinSize -> Maybe MaxSize
735 -> Maybe Text -- full text search
736 -> m (VersionedWithCount NgramsTable)
737 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
738 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
740 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
744 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
749 getTableNgramsVersion _nId _tabType listId = currentVersion listId
754 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
755 -- This line above looks like a waste of computation to finally get only the version.
756 -- See the comment about listNgramsChangedSince.
759 -- | Text search is deactivated for now for ngrams by doc only
760 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
762 -> ListId -> Limit -> Maybe Offset
764 -> Maybe MinSize -> Maybe MaxSize
766 -> Maybe Text -- full text search
767 -> m (VersionedWithCount NgramsTable)
768 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
769 ns <- selectNodesWithUsername NodeList userMaster
770 let ngramsType = ngramsTypeFromTabType tabType
771 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
772 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
773 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
777 apiNgramsTableCorpus :: ( GargServerC env err m
779 => NodeId -> ServerT TableNgramsApi m
780 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
782 :<|> scoresRecomputeTableNgrams cId
783 :<|> getTableNgramsVersion cId
784 :<|> apiNgramsAsync cId
786 apiNgramsTableDoc :: ( GargServerC env err m
788 => DocId -> ServerT TableNgramsApi m
789 apiNgramsTableDoc dId = getTableNgramsDoc dId
791 :<|> scoresRecomputeTableNgrams dId
792 :<|> getTableNgramsVersion dId
793 :<|> apiNgramsAsync dId
795 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
796 apiNgramsAsync _dId =
798 JobFunction $ \i log ->
801 printDebug "tableNgramsPostChartsAsync" x
803 in tableNgramsPostChartsAsync i log'
805 -- Did the given list of ngrams changed since the given version?
806 -- The returned value is versioned boolean value, meaning that one always retrieve the
808 -- If the given version is negative then one simply receive the latest version and True.
809 -- Using this function is more precise than simply comparing the latest version number
810 -- with the local version number. Indeed there might be no change to this particular list
811 -- and still the version number has changed because of other lists.
813 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
814 -- * currentVersion: good computation, good bandwidth, bad precision.
815 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
816 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
817 listNgramsChangedSince :: HasNodeStory env err m
818 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
819 listNgramsChangedSince listId ngramsType version
821 Versioned <$> currentVersion listId <*> pure True
823 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)