2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 get ngrams filtered by NgramsType
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
32 , getTableNgramsCorpus
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
56 , NgramsRepoElement(..)
58 , saveNodeStoryImmediate
73 , setNgramsTableScores
77 , VersionedWithCount(..)
79 , listNgramsChangedSince
80 , MinSize, MaxSize, OrderBy, NgramsTable
81 , UpdateTableNgramsCharts
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import Data.Either (Either(..))
91 import Data.Map.Strict (Map)
92 import Data.Maybe (fromMaybe)
94 import Data.Ord (Down(..))
95 import Data.Patch.Class (Action(act), Transformable(..), ours)
96 import Data.Swagger hiding (version, patch)
97 import Data.Text (Text, isInfixOf, unpack, pack)
98 import Data.Text.Lazy.IO as DTL
99 import Formatting (hprint, int, (%))
100 import GHC.Generics (Generic)
101 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
102 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
103 import Gargantext.API.Admin.Types (HasSettings)
104 import Gargantext.API.Job
105 import Gargantext.API.Ngrams.Types
106 import Gargantext.API.Prelude
107 import Gargantext.Core.NodeStory
108 import Gargantext.Core.Mail.Types (HasMail)
109 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
110 import Gargantext.API.Ngrams.Tools
111 import Gargantext.Database.Action.Flow.Types
112 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
113 import Gargantext.Database.Admin.Config (userMaster)
114 import Gargantext.Database.Admin.Types.Node (NodeType(..))
115 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
116 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
117 import Gargantext.Database.Query.Table.Node (getNode)
118 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
119 import Gargantext.Database.Query.Table.Node.Select
120 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
121 import Gargantext.Prelude hiding (log)
122 import Gargantext.Prelude.Clock (hasTime, getTime)
123 import Prelude (error)
124 import Servant hiding (Patch)
125 import Gargantext.Utils.Jobs (serveJobsAPI)
126 import System.IO (stderr)
127 import Test.QuickCheck (elements)
128 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
129 import qualified Data.Aeson.Text as DAT
130 import qualified Data.List as List
131 import qualified Data.Map.Strict as Map
132 import qualified Data.Map.Strict.Patch as PM
133 import qualified Data.Set as S
134 import qualified Data.Set as Set
135 import qualified Gargantext.API.Metrics as Metrics
136 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
139 -- TODO sequences of modifications (Patchs)
140 type NgramsIdPatch = Patch NgramsId NgramsPatch
142 ngramsPatch :: Int -> NgramsPatch
143 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
145 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
146 toEdit n p = Edit n p
147 ngramsIdPatch :: Patch NgramsId NgramsPatch
148 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
149 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
150 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
153 -- applyPatchBack :: Patch -> IO Patch
154 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
156 ------------------------------------------------------------------------
157 ------------------------------------------------------------------------
158 ------------------------------------------------------------------------
161 -- TODO: Replace.old is ignored which means that if the current list
162 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
163 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
164 -- However this should not happen in non conflicting situations.
165 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
166 mkListsUpdate nt patches =
167 [ (ngramsTypeId nt, ng, listTypeId lt)
168 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
169 , lt <- patch ^.. patch_list . new
172 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
175 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
176 mkChildrenGroups addOrRem nt patches =
177 [ (ngramsTypeId nt, parent, child)
178 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
179 , child <- patch ^.. patch_children . to addOrRem . folded
183 ------------------------------------------------------------------------
185 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
188 saver <- view hasNodeStorySaver
190 Gargantext.Prelude.putStrLn "---- Running node story saver ----"
192 Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
195 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
197 saveNodeStoryImmediate = do
198 saver <- view hasNodeStoryImmediateSaver
200 Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
202 Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
205 listTypeConflictResolution :: ListType -> ListType -> ListType
206 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
209 ngramsStatePatchConflictResolution
210 :: TableNgrams.NgramsType
212 -> ConflictResolutionNgramsPatch
213 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
214 -- = (ours, (const ours, ours), (False, False))
215 -- (False, False) mean here that Mod has always priority.
216 = (ours, (const ours, ours), (True, False))
217 -- (True, False) <- would mean priority to the left (same as ours).
218 -- undefined {- TODO think this through -}, listTypeConflictResolution)
224 -- Insertions are not considered as patches,
225 -- they do not extend history,
226 -- they do not bump version.
227 insertNewOnly :: a -> Maybe b -> a
228 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
229 -- TODO error handling
232 -- TODO refactor with putListNgrams
233 copyListNgrams :: RepoCmdM env err m
234 => NodeId -> NodeId -> NgramsType
236 copyListNgrams srcListId dstListId ngramsType = do
238 liftBase $ modifyMVar_ var $
239 pure . (r_state . at ngramsType %~ (Just . f . something))
242 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
243 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
245 -- TODO refactor with putListNgrams
246 -- The list must be non-empty!
247 -- The added ngrams must be non-existent!
248 addListNgrams :: RepoCmdM env err m
249 => NodeId -> NgramsType
250 -> [NgramsElement] -> m ()
251 addListNgrams listId ngramsType nes = do
253 liftBase $ modifyMVar_ var $
254 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
257 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
260 -- | TODO: incr the Version number
261 -- && should use patch
264 setListNgrams :: HasNodeStory env err m
266 -> TableNgrams.NgramsType
267 -> Map NgramsTerm NgramsRepoElement
269 setListNgrams listId ngramsType ns = do
270 -- printDebug "[setListNgrams]" (listId, ngramsType)
271 getter <- view hasNodeStory
272 var <- liftBase $ (getter ^. nse_getter) [listId]
273 liftBase $ modifyMVar_ var $
283 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
284 newNgramsFromNgramsStatePatch p =
285 [ text2ngrams (unNgramsTerm n)
286 | (n,np) <- p ^.. _PatchMap
287 -- . each . _PatchMap
288 . each . _NgramsTablePatch
289 . _PatchMap . ifolded . withIndex
290 , _ <- np ^.. patch_new . _Just
296 commitStatePatch :: (HasNodeStory env err m, HasMail env)
298 -> Versioned NgramsStatePatch'
299 -> m (Versioned NgramsStatePatch')
300 commitStatePatch listId (Versioned _p_version p) = do
301 -- printDebug "[commitStatePatch]" listId
302 var <- getNodeStoryVar [listId]
303 vq' <- liftBase $ modifyMVar var $ \ns -> do
305 a = ns ^. unNodeStory . at listId . _Just
306 -- apply patches from version p_version to a ^. a_version
308 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
309 q = mconcat $ a ^. a_history
311 printDebug "transformWith" (p,q)
314 (p', q') = transformWith ngramsStatePatchConflictResolution p q
315 a' = a & a_version +~ 1
317 & a_history %~ (p' :)
320 -- Ideally we would like to check these properties. However:
321 -- * They should be checked only to debug the code. The client data
322 -- should be able to trigger these.
323 -- * What kind of error should they throw (we are in IO here)?
324 -- * Should we keep modifyMVar?
325 -- * Should we throw the validation in an Exception, catch it around
326 -- modifyMVar and throw it back as an Error?
327 assertValid $ transformable p q
328 assertValid $ applicable p' (r ^. r_state)
330 -- printDebug "[commitStatePatch] a version" (a ^. a_version)
331 -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
332 pure ( ns & unNodeStory . at listId .~ (Just a')
333 , Versioned (a' ^. a_version) q'
337 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
343 -- This is a special case of tableNgramsPut where the input patch is empty.
344 tableNgramsPull :: HasNodeStory env err m
346 -> TableNgrams.NgramsType
348 -> m (Versioned NgramsTablePatch)
349 tableNgramsPull listId ngramsType p_version = do
350 printDebug "[tableNgramsPull]" (listId, ngramsType)
351 var <- getNodeStoryVar [listId]
352 r <- liftBase $ readMVar var
355 a = r ^. unNodeStory . at listId . _Just
356 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
357 q_table = q ^. _PatchMap . at ngramsType . _Just
359 pure (Versioned (a ^. a_version) q_table)
364 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
365 -- Apply the given patch to the DB and returns the patch to be applied on the
368 tableNgramsPut :: ( HasNodeStory env err m
369 , HasInvalidError err
375 -> Versioned NgramsTablePatch
376 -> m (Versioned NgramsTablePatch)
377 tableNgramsPut tabType listId (Versioned p_version p_table)
378 | p_table == mempty = do
379 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
380 let ngramsType = ngramsTypeFromTabType tabType
381 tableNgramsPull listId ngramsType p_version
384 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
385 let ngramsType = ngramsTypeFromTabType tabType
386 (p, p_validity) = PM.singleton ngramsType p_table
388 assertValid p_validity
390 ret <- commitStatePatch listId (Versioned p_version p)
391 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
397 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
402 => UpdateTableNgramsCharts
405 tableNgramsPostChartsAsync utn logStatus = do
406 let tabType = utn ^. utn_tab_type
407 let listId = utn ^. utn_list_id
409 node <- getNode listId
410 let nId = node ^. node_id
411 _uId = node ^. node_user_id
412 mCId = node ^. node_parent_id
414 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
415 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
419 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
420 pure $ jobLogFail $ jobLogInit 1
424 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
425 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
427 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
432 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
433 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
434 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
436 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
437 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
439 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
440 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
442 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
447 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
448 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
450 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
455 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
456 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
459 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
461 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
463 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
465 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
467 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
469 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
475 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
476 pure $ jobLogFail $ jobLogInit 1
479 { _ne_list :: ListType
480 If we merge the parents/children we can potentially create cycles!
481 , _ne_parent :: Maybe NgramsTerm
482 , _ne_children :: MSet NgramsTerm
486 getNgramsTableMap :: HasNodeStory env err m
488 -> TableNgrams.NgramsType
489 -> m (Versioned NgramsTableMap)
490 getNgramsTableMap nodeId ngramsType = do
491 v <- getNodeStoryVar [nodeId]
492 repo <- liftBase $ readMVar v
493 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
494 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
497 dumpJsonTableMap :: HasNodeStory env err m
500 -> TableNgrams.NgramsType
502 dumpJsonTableMap fpath nodeId ngramsType = do
503 m <- getNgramsTableMap nodeId ngramsType
504 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
511 -- | TODO Errors management
512 -- TODO: polymorphic for Annuaire or Corpus or ...
513 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
514 -- TODO: should take only one ListId
517 getTableNgrams :: forall env err m.
518 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
519 => NodeType -> NodeId -> TabType
520 -> ListId -> Limit -> Maybe Offset
522 -> Maybe MinSize -> Maybe MaxSize
524 -> (NgramsTerm -> Bool)
525 -> m (VersionedWithCount NgramsTable)
526 getTableNgrams _nType nId tabType listId limit_ offset
527 listType minSize maxSize orderBy searchQuery = do
530 -- lIds <- selectNodesWithUsername NodeList userMaster
532 ngramsType = ngramsTypeFromTabType tabType
533 offset' = maybe 0 identity offset
534 listType' = maybe (const True) (==) listType
535 minSize' = maybe (const True) (<=) minSize
536 maxSize' = maybe (const True) (>=) maxSize
538 rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
543 selected_node n = minSize' s
545 && searchQuery (n ^. ne_ngrams)
546 && listType' (n ^. ne_list)
550 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
552 ---------------------------------------
553 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
554 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
555 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
556 sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to length)
557 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to length)
559 ---------------------------------------
560 -- | Filter the given `tableMap` with the search criteria.
561 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
562 filteredNodes tableMap = roots
564 list = tableMap ^.. each
565 selected_nodes = list & filter selected_node
566 roots = rootOf tableMap <$> selected_nodes
568 -- | Appends subitems (selected from `tableMap`) for given `roots`.
569 withInners :: Map NgramsTerm NgramsElement -> [NgramsElement] -> [NgramsElement]
570 withInners tableMap roots = roots <> inners
572 list = tableMap ^.. each
573 rootSet = Set.fromList (_ne_ngrams <$> roots)
574 inners = list & filter (selected_inner rootSet)
576 -- | Paginate the results
577 sortAndPaginate :: [NgramsElement] -> [NgramsElement]
578 sortAndPaginate = take limit_
580 . sortOnOrder orderBy
582 ---------------------------------------
584 let scoresNeeded = needsScores orderBy
587 tableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
589 let fltr = tableMap & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
591 let fltrCount = length $ fltr ^. v_data . _NgramsTable
594 let tableMapSorted = over (v_data . _NgramsTable) ((withInners (tableMap ^. v_data)) . sortAndPaginate) fltr
596 --printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
599 ("getTableNgrams total=" % hasTime
603 % " sql=" % (if scoresNeeded then "map2" else "map3")
605 ) t0 t3 t0 t1 t1 t2 t2 t3
607 -- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
608 pure $ toVersionedWithCount fltrCount tableMapSorted
611 -- | Helper function to get the ngrams table with scores.
612 getNgramsTable' :: forall env err m.
613 ( HasNodeStory env err m
615 , HasConnectionPool env
620 -> TableNgrams.NgramsType
621 -> m (Versioned (Map.Map NgramsTerm NgramsElement))
622 getNgramsTable' nId listId ngramsType = do
623 tableMap <- getNgramsTableMap listId ngramsType
624 tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
625 . Map.mapWithKey ngramsElementFromRepo
627 -- | Helper function to set scores on an `NgramsTable`.
628 setNgramsTableScores :: forall env err m t.
629 ( Each t t NgramsElement NgramsElement
630 , HasNodeStory env err m
632 , HasConnectionPool env
637 -> TableNgrams.NgramsType
640 setNgramsTableScores nId listId ngramsType table = do
642 occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
643 --printDebug "[setNgramsTableScores] occurrences" occurrences
646 let ngrams_terms = table ^.. each . ne_ngrams
647 -- printDebug "ngrams_terms" ngrams_terms
649 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
650 (length ngrams_terms) t1 t2
652 setOcc ne = ne & ne_occurrences .~ msumOf (at (ne ^. ne_ngrams) . _Just) occurrences
654 --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
656 pure $ table & each %~ setOcc
661 scoresRecomputeTableNgrams :: forall env err m.
662 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
663 => NodeId -> TabType -> ListId -> m Int
664 scoresRecomputeTableNgrams nId tabType listId = do
665 tableMap <- getNgramsTableMap listId ngramsType
666 _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
667 . Map.mapWithKey ngramsElementFromRepo
671 ngramsType = ngramsTypeFromTabType tabType
676 -- TODO: find a better place for the code above, All APIs stay here
678 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
679 deriving (Generic, Enum, Bounded, Read, Show)
681 instance FromHttpApiData OrderBy
683 parseUrlPiece "TermAsc" = pure TermAsc
684 parseUrlPiece "TermDesc" = pure TermDesc
685 parseUrlPiece "ScoreAsc" = pure ScoreAsc
686 parseUrlPiece "ScoreDesc" = pure ScoreDesc
687 parseUrlPiece _ = Left "Unexpected value of OrderBy"
689 instance ToHttpApiData OrderBy where
690 toUrlPiece = pack . show
692 instance ToParamSchema OrderBy
693 instance FromJSON OrderBy
694 instance ToJSON OrderBy
695 instance ToSchema OrderBy
696 instance Arbitrary OrderBy
698 arbitrary = elements [minBound..maxBound]
700 needsScores :: Maybe OrderBy -> Bool
701 needsScores (Just ScoreAsc) = True
702 needsScores (Just ScoreDesc) = True
703 needsScores _ = False
705 type TableNgramsApiGet = Summary " Table Ngrams API Get"
706 :> QueryParamR "ngramsType" TabType
707 :> QueryParamR "list" ListId
708 :> QueryParamR "limit" Limit
709 :> QueryParam "offset" Offset
710 :> QueryParam "listType" ListType
711 :> QueryParam "minTermSize" MinSize
712 :> QueryParam "maxTermSize" MaxSize
713 :> QueryParam "orderBy" OrderBy
714 :> QueryParam "search" Text
715 :> Get '[JSON] (VersionedWithCount NgramsTable)
717 type TableNgramsApiPut = Summary " Table Ngrams API Change"
718 :> QueryParamR "ngramsType" TabType
719 :> QueryParamR "list" ListId
720 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
721 :> Put '[JSON] (Versioned NgramsTablePatch)
723 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
724 :> QueryParamR "ngramsType" TabType
725 :> QueryParamR "list" ListId
726 :> "recompute" :> Post '[JSON] Int
728 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
729 :> QueryParamR "ngramsType" TabType
730 :> QueryParamR "list" ListId
731 :> Get '[JSON] Version
733 type TableNgramsApi = TableNgramsApiGet
734 :<|> TableNgramsApiPut
735 :<|> RecomputeScoresNgramsApiGet
736 :<|> "version" :> TableNgramsApiGetVersion
737 :<|> TableNgramsAsyncApi
739 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
743 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
745 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
752 -> Maybe MinSize -> Maybe MaxSize
754 -> Maybe Text -- full text search
755 -> m (VersionedWithCount NgramsTable)
756 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
757 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
759 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
763 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
768 getTableNgramsVersion _nId _tabType listId = currentVersion listId
773 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
774 -- This line above looks like a waste of computation to finally get only the version.
775 -- See the comment about listNgramsChangedSince.
778 -- | Text search is deactivated for now for ngrams by doc only
779 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
781 -> ListId -> Limit -> Maybe Offset
783 -> Maybe MinSize -> Maybe MaxSize
785 -> Maybe Text -- full text search
786 -> m (VersionedWithCount NgramsTable)
787 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
788 ns <- selectNodesWithUsername NodeList userMaster
789 let ngramsType = ngramsTypeFromTabType tabType
790 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
791 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
792 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
796 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
797 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
799 :<|> scoresRecomputeTableNgrams cId
800 :<|> getTableNgramsVersion cId
801 :<|> apiNgramsAsync cId
803 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
804 apiNgramsTableDoc dId = getTableNgramsDoc dId
806 :<|> scoresRecomputeTableNgrams dId
807 :<|> getTableNgramsVersion dId
808 :<|> apiNgramsAsync dId
810 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
811 apiNgramsAsync _dId =
812 serveJobsAPI TableNgramsJob $ \i log ->
815 printDebug "tableNgramsPostChartsAsync" x
817 in tableNgramsPostChartsAsync i log'
819 -- Did the given list of ngrams changed since the given version?
820 -- The returned value is versioned boolean value, meaning that one always retrieve the
822 -- If the given version is negative then one simply receive the latest version and True.
823 -- Using this function is more precise than simply comparing the latest version number
824 -- with the local version number. Indeed there might be no change to this particular list
825 -- and still the version number has changed because of other lists.
827 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
828 -- * currentVersion: good computation, good bandwidth, bad precision.
829 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
830 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
831 listNgramsChangedSince :: HasNodeStory env err m
832 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
833 listNgramsChangedSince listId ngramsType version
835 Versioned <$> currentVersion listId <*> pure True
837 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)