2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 get ngrams filtered by NgramsType
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
33 , getTableNgramsCorpus
35 --, rmListNgrams TODO fix before exporting
36 , apiNgramsTableCorpus
57 , NgramsRepoElement(..)
59 , saveNodeStoryImmediate
74 , setNgramsTableScores
78 , VersionedWithCount(..)
80 , listNgramsChangedSince
81 , MinSize, MaxSize, OrderBy, NgramsTable
82 , UpdateTableNgramsCharts
86 import Control.Concurrent
87 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
88 import Control.Monad.Reader
90 import Data.Map.Strict (Map)
91 import Data.Maybe (fromMaybe)
93 import Data.Ord (Down(..))
94 import Data.Patch.Class (Action(act), Transformable(..), ours)
96 import Data.Text (Text, isInfixOf, toLower, unpack)
97 import Data.Text.Lazy.IO as DTL
98 import Formatting (hprint, int, (%))
99 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
100 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
101 import Gargantext.API.Admin.Types (HasSettings)
102 import Gargantext.API.Ngrams.Types
103 import Gargantext.API.Prelude
104 import Gargantext.Core.NodeStory
105 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasInvalidError, ContextId)
106 import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
107 import Gargantext.API.Ngrams.Tools
108 import Gargantext.Database.Action.Flow.Types
109 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
110 import Gargantext.Database.Admin.Config (userMaster)
111 import Gargantext.Database.Admin.Types.Node (NodeType(..))
112 import Gargantext.Database.Prelude (CmdCommon)
113 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
114 import Gargantext.Database.Query.Table.Node (getNode)
115 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
116 import Gargantext.Database.Query.Table.Node.Select
117 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
118 import Gargantext.Prelude hiding (log)
119 import Gargantext.Prelude.Clock (hasTime, getTime)
120 import Prelude (error)
121 import Servant hiding (Patch)
122 import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
123 import System.IO (stderr)
124 import qualified Data.Aeson.Text as DAT
125 import qualified Data.List as List
126 import qualified Data.Map.Strict as Map
127 import qualified Data.Map.Strict.Patch as PM
128 import qualified Data.Set as Set
129 import qualified Gargantext.API.Metrics as Metrics
130 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
133 -- TODO sequences of modifications (Patchs)
134 type NgramsIdPatch = Patch NgramsId NgramsPatch
136 ngramsPatch :: Int -> NgramsPatch
137 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
139 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
140 toEdit n p = Edit n p
141 ngramsIdPatch :: Patch NgramsId NgramsPatch
142 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
143 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
144 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
147 -- applyPatchBack :: Patch -> IO Patch
148 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
150 ------------------------------------------------------------------------
151 ------------------------------------------------------------------------
152 ------------------------------------------------------------------------
155 -- TODO: Replace.old is ignored which means that if the current list
156 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
157 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
158 -- However this should not happen in non conflicting situations.
159 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
160 mkListsUpdate nt patches =
161 [ (ngramsTypeId nt, ng, listTypeId lt)
162 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
163 , lt <- patch ^.. patch_list . new
166 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
169 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
170 mkChildrenGroups addOrRem nt patches =
171 [ (ngramsTypeId nt, parent, child)
172 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
173 , child <- patch ^.. patch_children . to addOrRem . folded
177 ------------------------------------------------------------------------
179 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
182 saver <- view hasNodeStorySaver
184 --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
186 --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
189 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
191 saveNodeStoryImmediate = do
192 saver <- view hasNodeStoryImmediateSaver
194 --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
196 --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
198 listTypeConflictResolution :: ListType -> ListType -> ListType
199 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
202 ngramsStatePatchConflictResolution
203 :: TableNgrams.NgramsType
205 -> ConflictResolutionNgramsPatch
206 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
207 = (ours, (const ours, ours), (False, False))
208 -- (False, False) mean here that Mod has always priority.
209 -- = (ours, (const ours, ours), (True, False))
210 -- (True, False) <- would mean priority to the left (same as ours).
211 -- undefined {- TODO think this through -}, listTypeConflictResolution)
217 -- Insertions are not considered as patches,
218 -- they do not extend history,
219 -- they do not bump version.
220 insertNewOnly :: a -> Maybe b -> a
221 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
222 -- TODO error handling
225 -- TODO refactor with putListNgrams
226 copyListNgrams :: RepoCmdM env err m
227 => NodeId -> NodeId -> NgramsType
229 copyListNgrams srcListId dstListId ngramsType = do
231 liftBase $ modifyMVar_ var $
232 pure . (r_state . at ngramsType %~ (Just . f . something))
235 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
236 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
238 -- TODO refactor with putListNgrams
239 -- The list must be non-empty!
240 -- The added ngrams must be non-existent!
241 addListNgrams :: RepoCmdM env err m
242 => NodeId -> NgramsType
243 -> [NgramsElement] -> m ()
244 addListNgrams listId ngramsType nes = do
246 liftBase $ modifyMVar_ var $
247 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
250 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
253 -- | TODO: incr the Version number
254 -- && should use patch
257 setListNgrams :: HasNodeStory env err m
259 -> TableNgrams.NgramsType
260 -> Map NgramsTerm NgramsRepoElement
262 setListNgrams listId ngramsType ns = do
263 -- printDebug "[setListNgrams]" (listId, ngramsType)
264 getter <- view hasNodeStory
265 var <- liftBase $ (getter ^. nse_getter) [listId]
266 liftBase $ modifyMVar_ var $
276 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
277 newNgramsFromNgramsStatePatch p =
278 [ text2ngrams (unNgramsTerm n)
279 | (n,np) <- p ^.. _PatchMap
280 -- . each . _PatchMap
281 . each . _NgramsTablePatch
282 . _PatchMap . ifolded . withIndex
283 , _ <- np ^.. patch_new . _Just
289 commitStatePatch :: ( HasNodeStory env err m
290 , HasNodeStoryImmediateSaver env
291 , HasNodeArchiveStoryImmediateSaver env
294 -> Versioned NgramsStatePatch'
295 -> m (Versioned NgramsStatePatch')
296 commitStatePatch listId (Versioned _p_version p) = do
297 -- printDebug "[commitStatePatch]" listId
298 var <- getNodeStoryVar [listId]
299 archiveSaver <- view hasNodeArchiveStoryImmediateSaver
300 vq' <- liftBase $ modifyMVar var $ \ns -> do
302 a = ns ^. unNodeStory . at listId . _Just
303 -- apply patches from version p_version to a ^. a_version
305 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
306 q = mconcat $ a ^. a_history
308 --printDebug "[commitStatePatch] transformWith" (p,q)
309 -- let tws s = case s of
312 -- printDebug "[commitStatePatch] transformWith" (tws $ p ^. _NgramsPatch, tws $ q ^. _NgramsPatch)
315 (p', q') = transformWith ngramsStatePatchConflictResolution p q
316 a' = a & a_version +~ 1
318 & a_history %~ (p' :)
321 -- Ideally we would like to check these properties. However:
322 -- * They should be checked only to debug the code. The client data
323 -- should be able to trigger these.
324 -- * What kind of error should they throw (we are in IO here)?
325 -- * Should we keep modifyMVar?
326 -- * Should we throw the validation in an Exception, catch it around
327 -- modifyMVar and throw it back as an Error?
328 assertValid $ transformable p q
329 assertValid $ applicable p' (r ^. r_state)
331 -- printDebug "[commitStatePatch] a version" (a ^. a_version)
332 -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
333 let newNs = ( ns & unNodeStory . at listId .~ (Just a')
334 , Versioned (a' ^. a_version) q'
337 -- NOTE Now is the only good time to save the archive history. We
338 -- have the handle to the MVar and we need to save its exact
339 -- snapshot. Node Story archive is a linear table, so it's only
340 -- couple of inserts, it shouldn't take long...
342 -- If we postponed saving the archive to the debounce action, we
343 -- would have issues like
344 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
345 -- where the `q` computation from above (which uses the archive)
346 -- would cause incorrect patch application (before the previous
347 -- archive was saved and applied)
348 newNs' <- archiveSaver $ fst newNs
350 pure (newNs', snd newNs)
353 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
354 -- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce)
356 saveNodeStoryImmediate
362 -- This is a special case of tableNgramsPut where the input patch is empty.
363 tableNgramsPull :: HasNodeStory env err m
365 -> TableNgrams.NgramsType
367 -> m (Versioned NgramsTablePatch)
368 tableNgramsPull listId ngramsType p_version = do
369 -- printDebug "[tableNgramsPull]" (listId, ngramsType)
370 var <- getNodeStoryVar [listId]
371 r <- liftBase $ readMVar var
374 a = r ^. unNodeStory . at listId . _Just
375 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
376 q_table = q ^. _PatchMap . at ngramsType . _Just
378 pure (Versioned (a ^. a_version) q_table)
383 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
384 -- Apply the given patch to the DB and returns the patch to be applied on the
387 tableNgramsPut :: ( HasNodeStory env err m
388 , HasNodeStoryImmediateSaver env
389 , HasNodeArchiveStoryImmediateSaver env
390 , HasInvalidError err
395 -> Versioned NgramsTablePatch
396 -> m (Versioned NgramsTablePatch)
397 tableNgramsPut tabType listId (Versioned p_version p_table)
398 | p_table == mempty = do
399 -- printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
400 let ngramsType = ngramsTypeFromTabType tabType
401 tableNgramsPull listId ngramsType p_version
404 -- printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
405 let ngramsType = ngramsTypeFromTabType tabType
406 (p, p_validity) = PM.singleton ngramsType p_table
408 assertValid p_validity
410 ret <- commitStatePatch listId (Versioned p_version p)
411 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
417 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
423 => UpdateTableNgramsCharts
426 tableNgramsPostChartsAsync utn jobHandle = do
427 let tabType = utn ^. utn_tab_type
428 let listId = utn ^. utn_list_id
430 node <- getNode listId
431 let _nId = node ^. node_id
432 _uId = node ^. node_user_id
433 mCId = node ^. node_parent_id
435 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
436 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
440 -- printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
441 markStarted 1 jobHandle
442 markFailed Nothing jobHandle
446 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
447 markStarted 1 jobHandle
448 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
449 markComplete jobHandle
451 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
452 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
453 markStarted 3 jobHandle
454 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
455 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
456 markProgress 1 jobHandle
457 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
458 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
459 markProgress 1 jobHandle
460 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
461 markComplete jobHandle
463 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
464 markStarted 1 jobHandle
465 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
466 markComplete jobHandle
468 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
469 markStarted 6 jobHandle
471 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
473 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
475 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
477 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
479 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
481 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
483 markComplete jobHandle
485 -- printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
486 markStarted 1 jobHandle
487 markFailed Nothing jobHandle
490 { _ne_list :: ListType
491 If we merge the parents/children we can potentially create cycles!
492 , _ne_parent :: Maybe NgramsTerm
493 , _ne_children :: MSet NgramsTerm
497 getNgramsTableMap :: HasNodeStory env err m
499 -> TableNgrams.NgramsType
500 -> m (Versioned NgramsTableMap)
501 getNgramsTableMap nodeId ngramsType = do
502 v <- getNodeStoryVar [nodeId]
503 repo <- liftBase $ readMVar v
504 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
505 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
508 dumpJsonTableMap :: HasNodeStory env err m
511 -> TableNgrams.NgramsType
513 dumpJsonTableMap fpath nodeId ngramsType = do
514 m <- getNgramsTableMap nodeId ngramsType
515 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
519 -- | TODO Errors management
520 -- TODO: polymorphic for Annuaire or Corpus or ...
521 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
522 -- TODO: should take only one ListId
524 -- | /pure/ function to query a 'Map NgramsTerm NgramsElement', according to a
525 -- search function. Returns a /versioned/ 'NgramsTable' which is paginated and
526 -- sorted according to the input 'NgramsSearchQuery', together with the
527 -- occurrences of the elements.
528 searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
530 -- ^ The search query on the retrieved data
531 -> VersionedWithCount NgramsTable
532 searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
533 let tableMap = versionedTableMap ^. v_data
534 filteredData = filterNodes tableMap
535 tableMapSorted = versionedTableMap
536 & v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ filteredData)
538 in toVersionedWithCount (Set.size filteredData) tableMapSorted
541 -- | Returns the \"root\" of the 'NgramsElement', or it falls back to the input
542 -- 'NgramsElement' itself, if no root can be found.
543 -- /CAREFUL/: The root we select might /not/ have the same 'listType' we are
544 -- filtering for, in which case we have to change its type to match, if needed.
545 rootOf :: Map NgramsTerm NgramsElement -> NgramsElement -> NgramsElement
546 rootOf tblMap ne = case ne ^. ne_root of
549 | Just r <- tblMap ^. at rootKey
550 -- NOTE(adinapoli) It's unclear what is the correct behaviour here: should
551 -- we override the type or we filter out the node altogether?
552 -> over ne_list (\oldList -> fromMaybe oldList _nsq_listType) r
556 -- | Returns 'True' if the input 'NgramsElement' satisfies the search criteria
557 -- mandated by 'NgramsSearchQuery'.
558 matchingNode :: NgramsElement -> Bool
559 matchingNode inputNode =
560 let nodeSize = inputNode ^. ne_size
561 matchesListType = maybe (const True) (==) _nsq_listType
562 respectsMinSize = maybe (const True) (<=) (getMinSize <$> _nsq_minSize)
563 respectsMaxSize = maybe (const True) (>=) (getMaxSize <$> _nsq_maxSize)
565 in respectsMinSize nodeSize
566 && respectsMaxSize nodeSize
567 && _nsq_searchQuery (inputNode ^. ne_ngrams)
568 && matchesListType (inputNode ^. ne_list)
570 sortOnOrder :: Maybe OrderBy -> ([NgramsElement] -> [NgramsElement])
571 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
572 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
573 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
574 sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size)
575 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
577 -- | Filters the given `tableMap` with the search criteria. It returns
578 -- a set of 'NgramsElement' all matching the input 'NGramsSearchQuery'.
579 filterNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement
580 filterNodes tblMap = Set.map (rootOf tblMap) selectedNodes
582 allNodes = Set.fromList $ Map.elems tblMap
583 selectedNodes = Set.filter matchingNode allNodes
585 -- | For each input root, extends its occurrence count with
586 -- the information found in the subitems.
587 withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement
588 withInners tblMap roots = Set.map addSubitemsOccurrences roots
590 addSubitemsOccurrences :: NgramsElement -> NgramsElement
591 addSubitemsOccurrences e =
592 e { _ne_occurrences = foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children) }
594 alterOccurrences :: Set ContextId -> NgramsTerm -> Set ContextId
595 alterOccurrences occs t = case Map.lookup t tblMap of
597 Just e' -> occs <> e' ^. ne_occurrences
599 -- | Paginate the results
600 sortAndPaginate :: Set NgramsElement -> [NgramsElement]
602 let offset' = getOffset $ maybe 0 identity _nsq_offset
603 in take (getLimit _nsq_limit)
605 . sortOnOrder _nsq_orderBy
610 getTableNgrams :: forall env err m.
611 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
616 -> m (VersionedWithCount NgramsTable)
617 getTableNgrams nodeId listId tabType searchQuery = do
618 let ngramsType = ngramsTypeFromTabType tabType
619 versionedInput <- getNgramsTable' nodeId listId ngramsType
620 pure $ searchTableNgrams versionedInput searchQuery
623 -- | Helper function to get the ngrams table with scores.
624 getNgramsTable' :: forall env err m.
625 ( HasNodeStory env err m
630 -> TableNgrams.NgramsType
631 -> m (Versioned (Map.Map NgramsTerm NgramsElement))
632 getNgramsTable' nId listId ngramsType = do
633 tableMap <- getNgramsTableMap listId ngramsType
634 tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
635 . Map.mapWithKey ngramsElementFromRepo
637 -- | Helper function to set scores on an `NgramsTable`.
638 setNgramsTableScores :: forall env err m t.
639 ( Each t t NgramsElement NgramsElement
640 , HasNodeStory env err m
645 -> TableNgrams.NgramsType
648 setNgramsTableScores nId listId ngramsType table = do
650 occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
651 --printDebug "[setNgramsTableScores] occurrences" occurrences
654 let ngrams_terms = table ^.. each . ne_ngrams
655 -- printDebug "ngrams_terms" ngrams_terms
657 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
658 (length ngrams_terms) t1 t2
660 setOcc ne = ne & ne_occurrences .~ Set.fromList (msumOf (at (ne ^. ne_ngrams) . _Just) occurrences)
662 --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
664 pure $ table & each %~ setOcc
669 scoresRecomputeTableNgrams :: forall env err m.
670 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
671 => NodeId -> TabType -> ListId -> m Int
672 scoresRecomputeTableNgrams nId tabType listId = do
673 tableMap <- getNgramsTableMap listId ngramsType
674 _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
675 . Map.mapWithKey ngramsElementFromRepo
679 ngramsType = ngramsTypeFromTabType tabType
684 -- TODO: find a better place for the code above, All APIs stay here
686 needsScores :: Maybe OrderBy -> Bool
687 needsScores (Just ScoreAsc) = True
688 needsScores (Just ScoreDesc) = True
689 needsScores _ = False
691 type TableNgramsApiGet = Summary " Table Ngrams API Get"
692 :> QueryParamR "ngramsType" TabType
693 :> QueryParamR "list" ListId
694 :> QueryParamR "limit" Limit
695 :> QueryParam "offset" Offset
696 :> QueryParam "listType" ListType
697 :> QueryParam "minTermSize" MinSize
698 :> QueryParam "maxTermSize" MaxSize
699 :> QueryParam "orderBy" OrderBy
700 :> QueryParam "search" Text
701 :> Get '[JSON] (VersionedWithCount NgramsTable)
703 type TableNgramsApiPut = Summary " Table Ngrams API Change"
704 :> QueryParamR "ngramsType" TabType
705 :> QueryParamR "list" ListId
706 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
707 :> Put '[JSON] (Versioned NgramsTablePatch)
709 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
710 :> QueryParamR "ngramsType" TabType
711 :> QueryParamR "list" ListId
712 :> "recompute" :> Post '[JSON] Int
714 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
715 :> QueryParamR "ngramsType" TabType
716 :> QueryParamR "list" ListId
717 :> Get '[JSON] Version
719 type TableNgramsApi = TableNgramsApiGet
720 :<|> TableNgramsApiPut
721 :<|> RecomputeScoresNgramsApiGet
722 :<|> "version" :> TableNgramsApiGetVersion
723 :<|> TableNgramsAsyncApi
725 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
729 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
731 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
738 -> Maybe MinSize -> Maybe MaxSize
740 -> Maybe Text -- full text search
741 -> m (VersionedWithCount NgramsTable)
742 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
743 getTableNgrams nId listId tabType searchQuery
745 searchQueryFn (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt)
746 searchQuery = NgramsSearchQuery {
748 , _nsq_offset = offset
749 , _nsq_listType = listType
750 , _nsq_minSize = minSize
751 , _nsq_maxSize = maxSize
752 , _nsq_orderBy = orderBy
753 , _nsq_searchQuery = searchQueryFn
759 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
764 getTableNgramsVersion _nId _tabType listId = currentVersion listId
769 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
770 -- This line above looks like a waste of computation to finally get only the version.
771 -- See the comment about listNgramsChangedSince.
774 -- | Text search is deactivated for now for ngrams by doc only
775 getTableNgramsDoc :: ( HasNodeStory env err m, HasNodeError err, CmdCommon env)
777 -> ListId -> Limit -> Maybe Offset
779 -> Maybe MinSize -> Maybe MaxSize
781 -> Maybe Text -- full text search
782 -> m (VersionedWithCount NgramsTable)
783 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
784 ns <- selectNodesWithUsername NodeList userMaster
785 let ngramsType = ngramsTypeFromTabType tabType
786 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
787 let searchQueryFn (NgramsTerm nt) = flip Set.member (Set.fromList ngs) nt
788 searchQuery = NgramsSearchQuery {
790 , _nsq_offset = offset
791 , _nsq_listType = listType
792 , _nsq_minSize = minSize
793 , _nsq_maxSize = maxSize
794 , _nsq_orderBy = orderBy
795 , _nsq_searchQuery = searchQueryFn
797 getTableNgrams dId listId tabType searchQuery
800 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
801 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
803 :<|> scoresRecomputeTableNgrams cId
804 :<|> getTableNgramsVersion cId
805 :<|> apiNgramsAsync cId
807 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
808 apiNgramsTableDoc dId = getTableNgramsDoc dId
810 :<|> scoresRecomputeTableNgrams dId
811 :<|> getTableNgramsVersion dId
812 :<|> apiNgramsAsync dId
814 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
815 apiNgramsAsync _dId =
816 serveJobsAPI TableNgramsJob $ \jHandle i -> withTracer (printDebug "tableNgramsPostChartsAsync") jHandle $
817 \jHandle' -> tableNgramsPostChartsAsync i jHandle'
819 -- Did the given list of ngrams changed since the given version?
820 -- The returned value is versioned boolean value, meaning that one always retrieve the
822 -- If the given version is negative then one simply receive the latest version and True.
823 -- Using this function is more precise than simply comparing the latest version number
824 -- with the local version number. Indeed there might be no change to this particular list
825 -- and still the version number has changed because of other lists.
827 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
828 -- * currentVersion: good computation, good bandwidth, bad precision.
829 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
830 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
831 listNgramsChangedSince :: HasNodeStory env err m
832 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
833 listNgramsChangedSince listId ngramsType version
835 Versioned <$> currentVersion listId <*> pure True
837 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)