2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 get ngrams filtered by NgramsType
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
32 , getTableNgramsCorpus
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
56 , NgramsRepoElement(..)
58 , saveNodeStoryImmediate
73 , setNgramsTableScores
77 , VersionedWithCount(..)
79 , listNgramsChangedSince
80 , MinSize, MaxSize, OrderBy, NgramsTable
81 , UpdateTableNgramsCharts
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import Data.Either (Either(..))
91 import Data.Map.Strict (Map)
92 import Data.Maybe (fromMaybe)
94 import Data.Ord (Down(..))
95 import Data.Patch.Class (Action(act), Transformable(..), ours)
96 import Data.Swagger hiding (version, patch)
97 import Data.Text (Text, isInfixOf, unpack, pack)
98 import Data.Text.Lazy.IO as DTL
99 import Formatting (hprint, int, (%))
100 import GHC.Generics (Generic)
101 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
102 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
103 import Gargantext.API.Admin.Types (HasSettings)
104 import Gargantext.API.Job
105 import Gargantext.API.Ngrams.Types
106 import Gargantext.API.Prelude
107 import Gargantext.Core.NodeStory
108 import Gargantext.Core.Mail.Types (HasMail)
109 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
110 import Gargantext.API.Ngrams.Tools
111 import Gargantext.Database.Action.Flow.Types
112 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
113 import Gargantext.Database.Admin.Config (userMaster)
114 import Gargantext.Database.Admin.Types.Node (NodeType(..))
115 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
116 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
117 import Gargantext.Database.Query.Table.Node (getNode)
118 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
119 import Gargantext.Database.Query.Table.Node.Select
120 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
121 import Gargantext.Prelude hiding (log)
122 import Gargantext.Prelude.Clock (hasTime, getTime)
123 import Prelude (error)
124 import Servant hiding (Patch)
125 import Gargantext.Utils.Jobs (serveJobsAPI)
126 import System.IO (stderr)
127 import Test.QuickCheck (elements)
128 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
129 import qualified Data.Aeson.Text as DAT
130 import qualified Data.List as List
131 import qualified Data.Map.Strict as Map
132 import qualified Data.Map.Strict.Patch as PM
133 import qualified Data.Set as S
134 import qualified Data.Set as Set
135 import qualified Gargantext.API.Metrics as Metrics
136 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
139 -- TODO sequences of modifications (Patchs)
140 type NgramsIdPatch = Patch NgramsId NgramsPatch
142 ngramsPatch :: Int -> NgramsPatch
143 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
145 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
146 toEdit n p = Edit n p
147 ngramsIdPatch :: Patch NgramsId NgramsPatch
148 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
149 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
150 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
153 -- applyPatchBack :: Patch -> IO Patch
154 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
156 ------------------------------------------------------------------------
157 ------------------------------------------------------------------------
158 ------------------------------------------------------------------------
161 -- TODO: Replace.old is ignored which means that if the current list
162 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
163 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
164 -- However this should not happen in non conflicting situations.
165 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
166 mkListsUpdate nt patches =
167 [ (ngramsTypeId nt, ng, listTypeId lt)
168 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
169 , lt <- patch ^.. patch_list . new
172 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
175 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
176 mkChildrenGroups addOrRem nt patches =
177 [ (ngramsTypeId nt, parent, child)
178 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
179 , child <- patch ^.. patch_children . to addOrRem . folded
183 ------------------------------------------------------------------------
185 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
188 saver <- view hasNodeStorySaver
190 --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
192 --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
195 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
197 saveNodeStoryImmediate = do
198 saver <- view hasNodeStoryImmediateSaver
200 --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
202 --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
204 listTypeConflictResolution :: ListType -> ListType -> ListType
205 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
208 ngramsStatePatchConflictResolution
209 :: TableNgrams.NgramsType
211 -> ConflictResolutionNgramsPatch
212 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
213 = (ours, (const ours, ours), (False, False))
214 -- (False, False) mean here that Mod has always priority.
215 -- = (ours, (const ours, ours), (True, False))
216 -- (True, False) <- would mean priority to the left (same as ours).
217 -- undefined {- TODO think this through -}, listTypeConflictResolution)
223 -- Insertions are not considered as patches,
224 -- they do not extend history,
225 -- they do not bump version.
226 insertNewOnly :: a -> Maybe b -> a
227 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
228 -- TODO error handling
231 -- TODO refactor with putListNgrams
232 copyListNgrams :: RepoCmdM env err m
233 => NodeId -> NodeId -> NgramsType
235 copyListNgrams srcListId dstListId ngramsType = do
237 liftBase $ modifyMVar_ var $
238 pure . (r_state . at ngramsType %~ (Just . f . something))
241 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
242 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
244 -- TODO refactor with putListNgrams
245 -- The list must be non-empty!
246 -- The added ngrams must be non-existent!
247 addListNgrams :: RepoCmdM env err m
248 => NodeId -> NgramsType
249 -> [NgramsElement] -> m ()
250 addListNgrams listId ngramsType nes = do
252 liftBase $ modifyMVar_ var $
253 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
256 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
259 -- | TODO: incr the Version number
260 -- && should use patch
263 setListNgrams :: HasNodeStory env err m
265 -> TableNgrams.NgramsType
266 -> Map NgramsTerm NgramsRepoElement
268 setListNgrams listId ngramsType ns = do
269 -- printDebug "[setListNgrams]" (listId, ngramsType)
270 getter <- view hasNodeStory
271 var <- liftBase $ (getter ^. nse_getter) [listId]
272 liftBase $ modifyMVar_ var $
282 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
283 newNgramsFromNgramsStatePatch p =
284 [ text2ngrams (unNgramsTerm n)
285 | (n,np) <- p ^.. _PatchMap
286 -- . each . _PatchMap
287 . each . _NgramsTablePatch
288 . _PatchMap . ifolded . withIndex
289 , _ <- np ^.. patch_new . _Just
295 commitStatePatch :: ( HasNodeStory env err m
296 , HasNodeStoryImmediateSaver env
297 , HasNodeArchiveStoryImmediateSaver env
300 -> Versioned NgramsStatePatch'
301 -> m (Versioned NgramsStatePatch')
302 commitStatePatch listId (Versioned _p_version p) = do
303 -- printDebug "[commitStatePatch]" listId
304 var <- getNodeStoryVar [listId]
305 archiveSaver <- view hasNodeArchiveStoryImmediateSaver
306 vq' <- liftBase $ modifyMVar var $ \ns -> do
308 a = ns ^. unNodeStory . at listId . _Just
309 -- apply patches from version p_version to a ^. a_version
311 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
312 q = mconcat $ a ^. a_history
314 printDebug "transformWith" (p,q)
317 (p', q') = transformWith ngramsStatePatchConflictResolution p q
318 a' = a & a_version +~ 1
320 & a_history %~ (p' :)
323 -- Ideally we would like to check these properties. However:
324 -- * They should be checked only to debug the code. The client data
325 -- should be able to trigger these.
326 -- * What kind of error should they throw (we are in IO here)?
327 -- * Should we keep modifyMVar?
328 -- * Should we throw the validation in an Exception, catch it around
329 -- modifyMVar and throw it back as an Error?
330 assertValid $ transformable p q
331 assertValid $ applicable p' (r ^. r_state)
333 -- printDebug "[commitStatePatch] a version" (a ^. a_version)
334 -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
335 let newNs = ( ns & unNodeStory . at listId .~ (Just a')
336 , Versioned (a' ^. a_version) q'
339 -- NOTE Now is the only good time to save the archive history. We
340 -- have the handle to the MVar and we need to save its exact
341 -- snapshot. Node Story archive is a linear table, so it's only
342 -- couple of inserts, it shouldn't take long...
344 -- If we postponed saving the archive to the debounce action, we
345 -- would have issues like
346 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
347 -- where the `q` computation from above (which uses the archive)
348 -- would cause incorrect patch application (before the previous
349 -- archive was saved and applied)
350 newNs' <- archiveSaver $ fst newNs
352 pure (newNs', snd newNs)
354 -- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce)
356 --saveNodeStoryImmediate
358 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
364 -- This is a special case of tableNgramsPut where the input patch is empty.
365 tableNgramsPull :: HasNodeStory env err m
367 -> TableNgrams.NgramsType
369 -> m (Versioned NgramsTablePatch)
370 tableNgramsPull listId ngramsType p_version = do
371 printDebug "[tableNgramsPull]" (listId, ngramsType)
372 var <- getNodeStoryVar [listId]
373 r <- liftBase $ readMVar var
376 a = r ^. unNodeStory . at listId . _Just
377 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
378 q_table = q ^. _PatchMap . at ngramsType . _Just
380 pure (Versioned (a ^. a_version) q_table)
385 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
386 -- Apply the given patch to the DB and returns the patch to be applied on the
389 tableNgramsPut :: ( HasNodeStory env err m
390 , HasNodeStoryImmediateSaver env
391 , HasNodeArchiveStoryImmediateSaver env
392 , HasInvalidError err
398 -> Versioned NgramsTablePatch
399 -> m (Versioned NgramsTablePatch)
400 tableNgramsPut tabType listId (Versioned p_version p_table)
401 | p_table == mempty = do
402 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
403 let ngramsType = ngramsTypeFromTabType tabType
404 tableNgramsPull listId ngramsType p_version
407 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
408 let ngramsType = ngramsTypeFromTabType tabType
409 (p, p_validity) = PM.singleton ngramsType p_table
411 assertValid p_validity
413 ret <- commitStatePatch listId (Versioned p_version p)
414 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
420 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
425 => UpdateTableNgramsCharts
428 tableNgramsPostChartsAsync utn logStatus = do
429 let tabType = utn ^. utn_tab_type
430 let listId = utn ^. utn_list_id
432 node <- getNode listId
433 let nId = node ^. node_id
434 _uId = node ^. node_user_id
435 mCId = node ^. node_parent_id
437 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
438 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
442 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
443 pure $ jobLogFail $ jobLogInit 1
447 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
448 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
450 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
455 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
456 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
457 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
459 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
460 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
462 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
463 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
465 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
470 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
471 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
473 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
478 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
479 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
482 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
484 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
486 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
488 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
490 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
492 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
498 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
499 pure $ jobLogFail $ jobLogInit 1
502 { _ne_list :: ListType
503 If we merge the parents/children we can potentially create cycles!
504 , _ne_parent :: Maybe NgramsTerm
505 , _ne_children :: MSet NgramsTerm
509 getNgramsTableMap :: HasNodeStory env err m
511 -> TableNgrams.NgramsType
512 -> m (Versioned NgramsTableMap)
513 getNgramsTableMap nodeId ngramsType = do
514 v <- getNodeStoryVar [nodeId]
515 repo <- liftBase $ readMVar v
516 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
517 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
520 dumpJsonTableMap :: HasNodeStory env err m
523 -> TableNgrams.NgramsType
525 dumpJsonTableMap fpath nodeId ngramsType = do
526 m <- getNgramsTableMap nodeId ngramsType
527 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
534 -- | TODO Errors management
535 -- TODO: polymorphic for Annuaire or Corpus or ...
536 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
537 -- TODO: should take only one ListId
540 getTableNgrams :: forall env err m.
541 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
542 => NodeType -> NodeId -> TabType
543 -> ListId -> Limit -> Maybe Offset
545 -> Maybe MinSize -> Maybe MaxSize
547 -> (NgramsTerm -> Bool)
548 -> m (VersionedWithCount NgramsTable)
549 getTableNgrams _nType nId tabType listId limit_ offset
550 listType minSize maxSize orderBy searchQuery = do
553 -- lIds <- selectNodesWithUsername NodeList userMaster
555 ngramsType = ngramsTypeFromTabType tabType
556 offset' = maybe 0 identity offset
557 listType' = maybe (const True) (==) listType
558 minSize' = maybe (const True) (<=) minSize
559 maxSize' = maybe (const True) (>=) maxSize
561 rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
566 selected_node n = minSize' s
568 && searchQuery (n ^. ne_ngrams)
569 && listType' (n ^. ne_list)
573 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
575 ---------------------------------------
576 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
577 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
578 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
579 sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to length)
580 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to length)
582 ---------------------------------------
583 -- | Filter the given `tableMap` with the search criteria.
584 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
585 filteredNodes tableMap = roots
587 list = tableMap ^.. each
588 selected_nodes = list & filter selected_node
589 roots = rootOf tableMap <$> selected_nodes
591 -- | Appends subitems (selected from `tableMap`) for given `roots`.
592 withInners :: Map NgramsTerm NgramsElement -> [NgramsElement] -> [NgramsElement]
593 withInners tableMap roots = roots <> inners
595 list = tableMap ^.. each
596 rootSet = Set.fromList (_ne_ngrams <$> roots)
597 inners = list & filter (selected_inner rootSet)
599 -- | Paginate the results
600 sortAndPaginate :: [NgramsElement] -> [NgramsElement]
601 sortAndPaginate = take limit_
603 . sortOnOrder orderBy
605 ---------------------------------------
607 let scoresNeeded = needsScores orderBy
610 tableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
612 let fltr = tableMap & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
614 let fltrCount = length $ fltr ^. v_data . _NgramsTable
617 let tableMapSorted = over (v_data . _NgramsTable) ((withInners (tableMap ^. v_data)) . sortAndPaginate) fltr
619 --printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
622 ("getTableNgrams total=" % hasTime
626 % " sql=" % (if scoresNeeded then "map2" else "map3")
628 ) t0 t3 t0 t1 t1 t2 t2 t3
630 -- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
631 pure $ toVersionedWithCount fltrCount tableMapSorted
634 -- | Helper function to get the ngrams table with scores.
635 getNgramsTable' :: forall env err m.
636 ( HasNodeStory env err m
638 , HasConnectionPool env
643 -> TableNgrams.NgramsType
644 -> m (Versioned (Map.Map NgramsTerm NgramsElement))
645 getNgramsTable' nId listId ngramsType = do
646 tableMap <- getNgramsTableMap listId ngramsType
647 tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
648 . Map.mapWithKey ngramsElementFromRepo
650 -- | Helper function to set scores on an `NgramsTable`.
651 setNgramsTableScores :: forall env err m t.
652 ( Each t t NgramsElement NgramsElement
653 , HasNodeStory env err m
655 , HasConnectionPool env
660 -> TableNgrams.NgramsType
663 setNgramsTableScores nId listId ngramsType table = do
665 occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
666 --printDebug "[setNgramsTableScores] occurrences" occurrences
669 let ngrams_terms = table ^.. each . ne_ngrams
670 -- printDebug "ngrams_terms" ngrams_terms
672 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
673 (length ngrams_terms) t1 t2
675 setOcc ne = ne & ne_occurrences .~ msumOf (at (ne ^. ne_ngrams) . _Just) occurrences
677 --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
679 pure $ table & each %~ setOcc
684 scoresRecomputeTableNgrams :: forall env err m.
685 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
686 => NodeId -> TabType -> ListId -> m Int
687 scoresRecomputeTableNgrams nId tabType listId = do
688 tableMap <- getNgramsTableMap listId ngramsType
689 _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
690 . Map.mapWithKey ngramsElementFromRepo
694 ngramsType = ngramsTypeFromTabType tabType
699 -- TODO: find a better place for the code above, All APIs stay here
701 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
702 deriving (Generic, Enum, Bounded, Read, Show)
704 instance FromHttpApiData OrderBy
706 parseUrlPiece "TermAsc" = pure TermAsc
707 parseUrlPiece "TermDesc" = pure TermDesc
708 parseUrlPiece "ScoreAsc" = pure ScoreAsc
709 parseUrlPiece "ScoreDesc" = pure ScoreDesc
710 parseUrlPiece _ = Left "Unexpected value of OrderBy"
712 instance ToHttpApiData OrderBy where
713 toUrlPiece = pack . show
715 instance ToParamSchema OrderBy
716 instance FromJSON OrderBy
717 instance ToJSON OrderBy
718 instance ToSchema OrderBy
719 instance Arbitrary OrderBy
721 arbitrary = elements [minBound..maxBound]
723 needsScores :: Maybe OrderBy -> Bool
724 needsScores (Just ScoreAsc) = True
725 needsScores (Just ScoreDesc) = True
726 needsScores _ = False
728 type TableNgramsApiGet = Summary " Table Ngrams API Get"
729 :> QueryParamR "ngramsType" TabType
730 :> QueryParamR "list" ListId
731 :> QueryParamR "limit" Limit
732 :> QueryParam "offset" Offset
733 :> QueryParam "listType" ListType
734 :> QueryParam "minTermSize" MinSize
735 :> QueryParam "maxTermSize" MaxSize
736 :> QueryParam "orderBy" OrderBy
737 :> QueryParam "search" Text
738 :> Get '[JSON] (VersionedWithCount NgramsTable)
740 type TableNgramsApiPut = Summary " Table Ngrams API Change"
741 :> QueryParamR "ngramsType" TabType
742 :> QueryParamR "list" ListId
743 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
744 :> Put '[JSON] (Versioned NgramsTablePatch)
746 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
747 :> QueryParamR "ngramsType" TabType
748 :> QueryParamR "list" ListId
749 :> "recompute" :> Post '[JSON] Int
751 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
752 :> QueryParamR "ngramsType" TabType
753 :> QueryParamR "list" ListId
754 :> Get '[JSON] Version
756 type TableNgramsApi = TableNgramsApiGet
757 :<|> TableNgramsApiPut
758 :<|> RecomputeScoresNgramsApiGet
759 :<|> "version" :> TableNgramsApiGetVersion
760 :<|> TableNgramsAsyncApi
762 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
766 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
768 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
775 -> Maybe MinSize -> Maybe MaxSize
777 -> Maybe Text -- full text search
778 -> m (VersionedWithCount NgramsTable)
779 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
780 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
782 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
786 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
791 getTableNgramsVersion _nId _tabType listId = currentVersion listId
796 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
797 -- This line above looks like a waste of computation to finally get only the version.
798 -- See the comment about listNgramsChangedSince.
801 -- | Text search is deactivated for now for ngrams by doc only
802 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
804 -> ListId -> Limit -> Maybe Offset
806 -> Maybe MinSize -> Maybe MaxSize
808 -> Maybe Text -- full text search
809 -> m (VersionedWithCount NgramsTable)
810 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
811 ns <- selectNodesWithUsername NodeList userMaster
812 let ngramsType = ngramsTypeFromTabType tabType
813 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
814 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
815 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
819 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
820 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
822 :<|> scoresRecomputeTableNgrams cId
823 :<|> getTableNgramsVersion cId
824 :<|> apiNgramsAsync cId
826 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
827 apiNgramsTableDoc dId = getTableNgramsDoc dId
829 :<|> scoresRecomputeTableNgrams dId
830 :<|> getTableNgramsVersion dId
831 :<|> apiNgramsAsync dId
833 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
834 apiNgramsAsync _dId =
835 serveJobsAPI TableNgramsJob $ \i log ->
838 printDebug "tableNgramsPostChartsAsync" x
840 in tableNgramsPostChartsAsync i log'
842 -- Did the given list of ngrams changed since the given version?
843 -- The returned value is versioned boolean value, meaning that one always retrieve the
845 -- If the given version is negative then one simply receive the latest version and True.
846 -- Using this function is more precise than simply comparing the latest version number
847 -- with the local version number. Indeed there might be no change to this particular list
848 -- and still the version number has changed because of other lists.
850 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
851 -- * currentVersion: good computation, good bandwidth, bad precision.
852 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
853 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
854 listNgramsChangedSince :: HasNodeStory env err m
855 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
856 listNgramsChangedSince listId ngramsType version
858 Versioned <$> currentVersion listId <*> pure True
860 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)