2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 get ngrams filtered by NgramsType
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
32 , getTableNgramsCorpus
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
56 , NgramsRepoElement(..)
58 , saveNodeStoryImmediate
73 , setNgramsTableScores
77 , VersionedWithCount(..)
79 , listNgramsChangedSince
80 , MinSize, MaxSize, OrderBy, NgramsTable
81 , UpdateTableNgramsCharts
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import Data.Either (Either(..))
91 import Data.Map.Strict (Map)
92 import Data.Maybe (fromMaybe)
94 import Data.Ord (Down(..))
95 import Data.Patch.Class (Action(act), Transformable(..), ours)
96 import Data.Swagger hiding (version, patch)
97 import Data.Text (Text, isInfixOf, toLower, unpack, pack)
98 import Data.Text.Lazy.IO as DTL
99 import Formatting (hprint, int, (%))
100 import GHC.Generics (Generic)
101 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
102 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
103 import Gargantext.API.Admin.Types (HasSettings)
104 import Gargantext.API.Ngrams.Types
105 import Gargantext.API.Prelude
106 import Gargantext.Core.NodeStory
107 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasInvalidError)
108 import Gargantext.Core.Types.Query (Limit(..), Offset(..))
109 import Gargantext.API.Ngrams.Tools
110 import Gargantext.Database.Action.Flow.Types
111 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
112 import Gargantext.Database.Admin.Config (userMaster)
113 import Gargantext.Database.Admin.Types.Node (NodeType(..))
114 import Gargantext.Database.Prelude (CmdCommon)
115 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
116 import Gargantext.Database.Query.Table.Node (getNode)
117 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
118 import Gargantext.Database.Query.Table.Node.Select
119 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
120 import Gargantext.Prelude hiding (log)
121 import Gargantext.Prelude.Clock (hasTime, getTime)
122 import Prelude (error)
123 import Servant hiding (Patch)
124 import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
125 import System.IO (stderr)
126 import Test.QuickCheck (elements)
127 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
128 import qualified Data.Aeson.Text as DAT
129 import qualified Data.List as List
130 import qualified Data.Map.Strict as Map
131 import qualified Data.Map.Strict.Patch as PM
132 import qualified Data.Set as S
133 import qualified Data.Set as Set
134 import qualified Gargantext.API.Metrics as Metrics
135 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
138 -- TODO sequences of modifications (Patchs)
139 type NgramsIdPatch = Patch NgramsId NgramsPatch
141 ngramsPatch :: Int -> NgramsPatch
142 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
144 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
145 toEdit n p = Edit n p
146 ngramsIdPatch :: Patch NgramsId NgramsPatch
147 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
148 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
149 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
152 -- applyPatchBack :: Patch -> IO Patch
153 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
155 ------------------------------------------------------------------------
156 ------------------------------------------------------------------------
157 ------------------------------------------------------------------------
160 -- TODO: Replace.old is ignored which means that if the current list
161 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
162 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
163 -- However this should not happen in non conflicting situations.
164 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
165 mkListsUpdate nt patches =
166 [ (ngramsTypeId nt, ng, listTypeId lt)
167 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
168 , lt <- patch ^.. patch_list . new
171 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
174 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
175 mkChildrenGroups addOrRem nt patches =
176 [ (ngramsTypeId nt, parent, child)
177 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
178 , child <- patch ^.. patch_children . to addOrRem . folded
182 ------------------------------------------------------------------------
184 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
187 saver <- view hasNodeStorySaver
189 --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
191 --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
194 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
196 saveNodeStoryImmediate = do
197 saver <- view hasNodeStoryImmediateSaver
199 --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
201 --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
203 listTypeConflictResolution :: ListType -> ListType -> ListType
204 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
207 ngramsStatePatchConflictResolution
208 :: TableNgrams.NgramsType
210 -> ConflictResolutionNgramsPatch
211 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
212 = (ours, (const ours, ours), (False, False))
213 -- (False, False) mean here that Mod has always priority.
214 -- = (ours, (const ours, ours), (True, False))
215 -- (True, False) <- would mean priority to the left (same as ours).
216 -- undefined {- TODO think this through -}, listTypeConflictResolution)
222 -- Insertions are not considered as patches,
223 -- they do not extend history,
224 -- they do not bump version.
225 insertNewOnly :: a -> Maybe b -> a
226 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
227 -- TODO error handling
230 -- TODO refactor with putListNgrams
231 copyListNgrams :: RepoCmdM env err m
232 => NodeId -> NodeId -> NgramsType
234 copyListNgrams srcListId dstListId ngramsType = do
236 liftBase $ modifyMVar_ var $
237 pure . (r_state . at ngramsType %~ (Just . f . something))
240 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
241 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
243 -- TODO refactor with putListNgrams
244 -- The list must be non-empty!
245 -- The added ngrams must be non-existent!
246 addListNgrams :: RepoCmdM env err m
247 => NodeId -> NgramsType
248 -> [NgramsElement] -> m ()
249 addListNgrams listId ngramsType nes = do
251 liftBase $ modifyMVar_ var $
252 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
255 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
258 -- | TODO: incr the Version number
259 -- && should use patch
262 setListNgrams :: HasNodeStory env err m
264 -> TableNgrams.NgramsType
265 -> Map NgramsTerm NgramsRepoElement
267 setListNgrams listId ngramsType ns = do
268 -- printDebug "[setListNgrams]" (listId, ngramsType)
269 getter <- view hasNodeStory
270 var <- liftBase $ (getter ^. nse_getter) [listId]
271 liftBase $ modifyMVar_ var $
281 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
282 newNgramsFromNgramsStatePatch p =
283 [ text2ngrams (unNgramsTerm n)
284 | (n,np) <- p ^.. _PatchMap
285 -- . each . _PatchMap
286 . each . _NgramsTablePatch
287 . _PatchMap . ifolded . withIndex
288 , _ <- np ^.. patch_new . _Just
294 commitStatePatch :: ( HasNodeStory env err m
295 , HasNodeStoryImmediateSaver env
296 , HasNodeArchiveStoryImmediateSaver env
299 -> Versioned NgramsStatePatch'
300 -> m (Versioned NgramsStatePatch')
301 commitStatePatch listId (Versioned _p_version p) = do
302 -- printDebug "[commitStatePatch]" listId
303 var <- getNodeStoryVar [listId]
304 archiveSaver <- view hasNodeArchiveStoryImmediateSaver
305 vq' <- liftBase $ modifyMVar var $ \ns -> do
307 a = ns ^. unNodeStory . at listId . _Just
308 -- apply patches from version p_version to a ^. a_version
310 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
311 q = mconcat $ a ^. a_history
313 --printDebug "[commitStatePatch] transformWith" (p,q)
314 -- let tws s = case s of
317 -- printDebug "[commitStatePatch] transformWith" (tws $ p ^. _NgramsPatch, tws $ q ^. _NgramsPatch)
320 (p', q') = transformWith ngramsStatePatchConflictResolution p q
321 a' = a & a_version +~ 1
323 & a_history %~ (p' :)
326 -- Ideally we would like to check these properties. However:
327 -- * They should be checked only to debug the code. The client data
328 -- should be able to trigger these.
329 -- * What kind of error should they throw (we are in IO here)?
330 -- * Should we keep modifyMVar?
331 -- * Should we throw the validation in an Exception, catch it around
332 -- modifyMVar and throw it back as an Error?
333 assertValid $ transformable p q
334 assertValid $ applicable p' (r ^. r_state)
336 -- printDebug "[commitStatePatch] a version" (a ^. a_version)
337 -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
338 let newNs = ( ns & unNodeStory . at listId .~ (Just a')
339 , Versioned (a' ^. a_version) q'
342 -- NOTE Now is the only good time to save the archive history. We
343 -- have the handle to the MVar and we need to save its exact
344 -- snapshot. Node Story archive is a linear table, so it's only
345 -- couple of inserts, it shouldn't take long...
347 -- If we postponed saving the archive to the debounce action, we
348 -- would have issues like
349 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
350 -- where the `q` computation from above (which uses the archive)
351 -- would cause incorrect patch application (before the previous
352 -- archive was saved and applied)
353 newNs' <- archiveSaver $ fst newNs
355 pure (newNs', snd newNs)
357 -- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce)
359 --saveNodeStoryImmediate
361 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
367 -- This is a special case of tableNgramsPut where the input patch is empty.
368 tableNgramsPull :: HasNodeStory env err m
370 -> TableNgrams.NgramsType
372 -> m (Versioned NgramsTablePatch)
373 tableNgramsPull listId ngramsType p_version = do
374 -- printDebug "[tableNgramsPull]" (listId, ngramsType)
375 var <- getNodeStoryVar [listId]
376 r <- liftBase $ readMVar var
379 a = r ^. unNodeStory . at listId . _Just
380 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
381 q_table = q ^. _PatchMap . at ngramsType . _Just
383 pure (Versioned (a ^. a_version) q_table)
388 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
389 -- Apply the given patch to the DB and returns the patch to be applied on the
392 tableNgramsPut :: ( HasNodeStory env err m
393 , HasNodeStoryImmediateSaver env
394 , HasNodeArchiveStoryImmediateSaver env
395 , HasInvalidError err
400 -> Versioned NgramsTablePatch
401 -> m (Versioned NgramsTablePatch)
402 tableNgramsPut tabType listId (Versioned p_version p_table)
403 | p_table == mempty = do
404 -- printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
405 let ngramsType = ngramsTypeFromTabType tabType
406 tableNgramsPull listId ngramsType p_version
409 -- printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
410 let ngramsType = ngramsTypeFromTabType tabType
411 (p, p_validity) = PM.singleton ngramsType p_table
413 assertValid p_validity
415 ret <- commitStatePatch listId (Versioned p_version p)
416 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
422 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
428 => UpdateTableNgramsCharts
431 tableNgramsPostChartsAsync utn jobHandle = do
432 let tabType = utn ^. utn_tab_type
433 let listId = utn ^. utn_list_id
435 node <- getNode listId
436 let _nId = node ^. node_id
437 _uId = node ^. node_user_id
438 mCId = node ^. node_parent_id
440 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
441 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
445 -- printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
446 markStarted 1 jobHandle
447 markFailed Nothing jobHandle
451 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
452 markStarted 1 jobHandle
453 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
454 markComplete jobHandle
456 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
457 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
458 markStarted 3 jobHandle
459 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
460 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
461 markProgress 1 jobHandle
462 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
463 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
464 markProgress 1 jobHandle
465 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
466 markComplete jobHandle
468 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
469 markStarted 1 jobHandle
470 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
471 markComplete jobHandle
473 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
474 markStarted 6 jobHandle
476 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
478 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
480 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
482 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
484 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
486 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
488 markComplete jobHandle
490 -- printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
491 markStarted 1 jobHandle
492 markFailed Nothing jobHandle
495 { _ne_list :: ListType
496 If we merge the parents/children we can potentially create cycles!
497 , _ne_parent :: Maybe NgramsTerm
498 , _ne_children :: MSet NgramsTerm
502 getNgramsTableMap :: HasNodeStory env err m
504 -> TableNgrams.NgramsType
505 -> m (Versioned NgramsTableMap)
506 getNgramsTableMap nodeId ngramsType = do
507 v <- getNodeStoryVar [nodeId]
508 repo <- liftBase $ readMVar v
509 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
510 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
513 dumpJsonTableMap :: HasNodeStory env err m
516 -> TableNgrams.NgramsType
518 dumpJsonTableMap fpath nodeId ngramsType = do
519 m <- getNgramsTableMap nodeId ngramsType
520 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
527 -- | TODO Errors management
528 -- TODO: polymorphic for Annuaire or Corpus or ...
529 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
530 -- TODO: should take only one ListId
533 getTableNgrams :: forall env err m.
534 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
535 => NodeType -> NodeId -> TabType
536 -> ListId -> Limit -> Maybe Offset
538 -> Maybe MinSize -> Maybe MaxSize
540 -> (NgramsTerm -> Bool)
541 -> m (VersionedWithCount NgramsTable)
542 getTableNgrams _nType nId tabType listId limit_ offset
543 listType minSize maxSize orderBy searchQuery = do
546 -- lIds <- selectNodesWithUsername NodeList userMaster
548 ngramsType = ngramsTypeFromTabType tabType
549 offset' = getOffset $ maybe 0 identity offset
550 listType' = maybe (const True) (==) listType
551 minSize' = maybe (const True) (<=) minSize
552 maxSize' = maybe (const True) (>=) maxSize
554 rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
559 selected_node n = minSize' s
561 && searchQuery (n ^. ne_ngrams)
562 && listType' (n ^. ne_list)
566 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
568 ---------------------------------------
569 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
570 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
571 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
572 sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to List.nub . to length)
573 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to List.nub . to length)
575 ---------------------------------------
576 -- | Filter the given `tableMap` with the search criteria.
577 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
578 filteredNodes tableMap = roots
580 list = tableMap ^.. each
581 selected_nodes = list & filter selected_node
582 roots = rootOf tableMap <$> selected_nodes
584 -- | Appends subitems (selected from `tableMap`) for given `roots`.
585 withInners :: Map NgramsTerm NgramsElement -> [NgramsElement] -> [NgramsElement]
586 withInners tableMap roots = roots <> inners
588 list = tableMap ^.. each
589 rootSet = Set.fromList (_ne_ngrams <$> roots)
590 inners = list & filter (selected_inner rootSet)
592 -- | Paginate the results
593 sortAndPaginate :: [NgramsElement] -> [NgramsElement]
594 sortAndPaginate = take (getLimit limit_)
596 . sortOnOrder orderBy
598 ---------------------------------------
600 let scoresNeeded = needsScores orderBy
603 tableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
605 let fltr = tableMap & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
607 let fltrCount = length $ fltr ^. v_data . _NgramsTable
610 let tableMapSorted = over (v_data . _NgramsTable) ((withInners (tableMap ^. v_data)) . sortAndPaginate) fltr
612 --printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
615 ("getTableNgrams total=" % hasTime
619 % " sql=" % (if scoresNeeded then "map2" else "map3")
621 ) t0 t3 t0 t1 t1 t2 t2 t3
623 -- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
624 pure $ toVersionedWithCount fltrCount tableMapSorted
627 -- | Helper function to get the ngrams table with scores.
628 getNgramsTable' :: forall env err m.
629 ( HasNodeStory env err m
634 -> TableNgrams.NgramsType
635 -> m (Versioned (Map.Map NgramsTerm NgramsElement))
636 getNgramsTable' nId listId ngramsType = do
637 tableMap <- getNgramsTableMap listId ngramsType
638 tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
639 . Map.mapWithKey ngramsElementFromRepo
641 -- | Helper function to set scores on an `NgramsTable`.
642 setNgramsTableScores :: forall env err m t.
643 ( Each t t NgramsElement NgramsElement
644 , HasNodeStory env err m
649 -> TableNgrams.NgramsType
652 setNgramsTableScores nId listId ngramsType table = do
654 occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
655 --printDebug "[setNgramsTableScores] occurrences" occurrences
658 let ngrams_terms = table ^.. each . ne_ngrams
659 -- printDebug "ngrams_terms" ngrams_terms
661 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
662 (length ngrams_terms) t1 t2
664 setOcc ne = ne & ne_occurrences .~ msumOf (at (ne ^. ne_ngrams) . _Just) occurrences
666 --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
668 pure $ table & each %~ setOcc
673 scoresRecomputeTableNgrams :: forall env err m.
674 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
675 => NodeId -> TabType -> ListId -> m Int
676 scoresRecomputeTableNgrams nId tabType listId = do
677 tableMap <- getNgramsTableMap listId ngramsType
678 _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
679 . Map.mapWithKey ngramsElementFromRepo
683 ngramsType = ngramsTypeFromTabType tabType
688 -- TODO: find a better place for the code above, All APIs stay here
690 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
691 deriving (Generic, Enum, Bounded, Read, Show)
693 instance FromHttpApiData OrderBy
695 parseUrlPiece "TermAsc" = pure TermAsc
696 parseUrlPiece "TermDesc" = pure TermDesc
697 parseUrlPiece "ScoreAsc" = pure ScoreAsc
698 parseUrlPiece "ScoreDesc" = pure ScoreDesc
699 parseUrlPiece _ = Left "Unexpected value of OrderBy"
701 instance ToHttpApiData OrderBy where
702 toUrlPiece = pack . show
704 instance ToParamSchema OrderBy
705 instance FromJSON OrderBy
706 instance ToJSON OrderBy
707 instance ToSchema OrderBy
708 instance Arbitrary OrderBy
710 arbitrary = elements [minBound..maxBound]
712 needsScores :: Maybe OrderBy -> Bool
713 needsScores (Just ScoreAsc) = True
714 needsScores (Just ScoreDesc) = True
715 needsScores _ = False
717 type TableNgramsApiGet = Summary " Table Ngrams API Get"
718 :> QueryParamR "ngramsType" TabType
719 :> QueryParamR "list" ListId
720 :> QueryParamR "limit" Limit
721 :> QueryParam "offset" Offset
722 :> QueryParam "listType" ListType
723 :> QueryParam "minTermSize" MinSize
724 :> QueryParam "maxTermSize" MaxSize
725 :> QueryParam "orderBy" OrderBy
726 :> QueryParam "search" Text
727 :> Get '[JSON] (VersionedWithCount NgramsTable)
729 type TableNgramsApiPut = Summary " Table Ngrams API Change"
730 :> QueryParamR "ngramsType" TabType
731 :> QueryParamR "list" ListId
732 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
733 :> Put '[JSON] (Versioned NgramsTablePatch)
735 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
736 :> QueryParamR "ngramsType" TabType
737 :> QueryParamR "list" ListId
738 :> "recompute" :> Post '[JSON] Int
740 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
741 :> QueryParamR "ngramsType" TabType
742 :> QueryParamR "list" ListId
743 :> Get '[JSON] Version
745 type TableNgramsApi = TableNgramsApiGet
746 :<|> TableNgramsApiPut
747 :<|> RecomputeScoresNgramsApiGet
748 :<|> "version" :> TableNgramsApiGetVersion
749 :<|> TableNgramsAsyncApi
751 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
755 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
757 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
764 -> Maybe MinSize -> Maybe MaxSize
766 -> Maybe Text -- full text search
767 -> m (VersionedWithCount NgramsTable)
768 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
769 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
771 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt)
775 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
780 getTableNgramsVersion _nId _tabType listId = currentVersion listId
785 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
786 -- This line above looks like a waste of computation to finally get only the version.
787 -- See the comment about listNgramsChangedSince.
790 -- | Text search is deactivated for now for ngrams by doc only
791 getTableNgramsDoc :: ( HasNodeStory env err m, HasNodeError err, CmdCommon env)
793 -> ListId -> Limit -> Maybe Offset
795 -> Maybe MinSize -> Maybe MaxSize
797 -> Maybe Text -- full text search
798 -> m (VersionedWithCount NgramsTable)
799 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
800 ns <- selectNodesWithUsername NodeList userMaster
801 let ngramsType = ngramsTypeFromTabType tabType
802 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
803 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
804 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
808 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
809 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
811 :<|> scoresRecomputeTableNgrams cId
812 :<|> getTableNgramsVersion cId
813 :<|> apiNgramsAsync cId
815 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
816 apiNgramsTableDoc dId = getTableNgramsDoc dId
818 :<|> scoresRecomputeTableNgrams dId
819 :<|> getTableNgramsVersion dId
820 :<|> apiNgramsAsync dId
822 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
823 apiNgramsAsync _dId =
824 serveJobsAPI TableNgramsJob $ \jHandle i -> withTracer (printDebug "tableNgramsPostChartsAsync") jHandle $
825 \jHandle' -> tableNgramsPostChartsAsync i jHandle'
827 -- Did the given list of ngrams changed since the given version?
828 -- The returned value is versioned boolean value, meaning that one always retrieve the
830 -- If the given version is negative then one simply receive the latest version and True.
831 -- Using this function is more precise than simply comparing the latest version number
832 -- with the local version number. Indeed there might be no change to this particular list
833 -- and still the version number has changed because of other lists.
835 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
836 -- * currentVersion: good computation, good bandwidth, bad precision.
837 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
838 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
839 listNgramsChangedSince :: HasNodeStory env err m
840 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
841 listNgramsChangedSince listId ngramsType version
843 Versioned <$> currentVersion listId <*> pure True
845 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)