2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 get ngrams filtered by NgramsType
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
25 module Gargantext.API.Ngrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
56 , NgramsRepoElement(..)
81 , VersionedWithCount(..)
83 , listNgramsChangedSince
87 import Control.Concurrent
88 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
89 import Control.Monad.Reader
90 import Data.Aeson hiding ((.=))
91 import Data.Either (Either(..))
93 import Data.Map.Strict (Map)
94 import Data.Maybe (fromMaybe)
96 import Data.Ord (Down(..))
97 import Data.Patch.Class (Action(act), Transformable(..), ours)
98 import Data.Swagger hiding (version, patch)
99 import Data.Text (Text, isInfixOf, unpack)
100 import Data.Text.Lazy.IO as DTL
101 import Formatting (hprint, int, (%))
102 import GHC.Generics (Generic)
103 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
104 import Gargantext.API.Admin.Types (HasSettings)
105 import Gargantext.API.Job
106 import Gargantext.API.Ngrams.Types
107 import Gargantext.API.Prelude
108 import Gargantext.Core.NodeStory
109 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
110 import Gargantext.API.Ngrams.Tools
111 import Gargantext.Core.Utils (something)
112 import Gargantext.Database.Action.Flow.Types
113 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
114 import Gargantext.Database.Admin.Config (userMaster)
115 import Gargantext.Database.Admin.Types.Node (NodeType(..))
116 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
117 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
118 import Gargantext.Database.Query.Table.Node (getNode)
119 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
120 import Gargantext.Database.Query.Table.Node.Select
121 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
122 import Gargantext.Prelude hiding (log)
123 import Gargantext.Prelude.Clock (hasTime, getTime)
124 import Prelude (error)
125 import Servant hiding (Patch)
126 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
127 import System.IO (stderr)
128 import Test.QuickCheck (elements)
129 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
130 import qualified Data.Aeson.Text as DAT
131 import qualified Data.List as List
132 import qualified Data.Map.Strict as Map
133 import qualified Data.Map.Strict.Patch as PM
134 import qualified Data.Set as S
135 import qualified Data.Set as Set
136 import qualified Gargantext.API.Metrics as Metrics
137 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
140 -- TODO sequences of modifications (Patchs)
141 type NgramsIdPatch = Patch NgramsId NgramsPatch
143 ngramsPatch :: Int -> NgramsPatch
144 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
146 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
147 toEdit n p = Edit n p
148 ngramsIdPatch :: Patch NgramsId NgramsPatch
149 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
150 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
151 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
154 -- applyPatchBack :: Patch -> IO Patch
155 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
157 ------------------------------------------------------------------------
158 ------------------------------------------------------------------------
159 ------------------------------------------------------------------------
162 -- TODO: Replace.old is ignored which means that if the current list
163 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
164 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
165 -- However this should not happen in non conflicting situations.
166 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
167 mkListsUpdate nt patches =
168 [ (ngramsTypeId nt, ng, listTypeId lt)
169 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
170 , lt <- patch ^.. patch_list . new
173 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
176 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
177 mkChildrenGroups addOrRem nt patches =
178 [ (ngramsTypeId nt, parent, child)
179 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
180 , child <- patch ^.. patch_children . to addOrRem . folded
184 ------------------------------------------------------------------------
186 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
188 saveRepo = liftBase =<< view repoSaver
189 saveRepo' :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
191 saveRepo' = liftBase =<< view hasNodeStorySaver
195 listTypeConflictResolution :: ListType -> ListType -> ListType
196 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
198 ngramsStatePatchConflictResolution
199 :: TableNgrams.NgramsType
202 -> ConflictResolutionNgramsPatch
203 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
204 = (ours, (const ours, ours), (False, False))
205 -- (False, False) mean here that Mod has always priority.
206 -- (True, False) <- would mean priority to the left (same as ours).
207 -- undefined {- TODO think this through -}, listTypeConflictResolution)
209 ngramsStatePatchConflictResolution'
210 :: TableNgrams.NgramsType
212 -> ConflictResolutionNgramsPatch
213 ngramsStatePatchConflictResolution' _ngramsType _ngramsTerm
214 = (ours, (const ours, ours), (False, False))
215 -- (False, False) mean here that Mod has always priority.
216 -- (True, False) <- would mean priority to the left (same as ours).
217 -- undefined {- TODO think this through -}, listTypeConflictResolution)
223 -- Insertions are not considered as patches,
224 -- they do not extend history,
225 -- they do not bump version.
226 insertNewOnly :: a -> Maybe b -> a
227 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
228 -- TODO error handling
231 -- TODO refactor with putListNgrams
232 copyListNgrams :: RepoCmdM env err m
233 => NodeId -> NodeId -> NgramsType
235 copyListNgrams srcListId dstListId ngramsType = do
237 liftBase $ modifyMVar_ var $
238 pure . (r_state . at ngramsType %~ (Just . f . something))
241 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
242 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
244 -- TODO refactor with putListNgrams
245 -- The list must be non-empty!
246 -- The added ngrams must be non-existent!
247 addListNgrams :: RepoCmdM env err m
248 => NodeId -> NgramsType
249 -> [NgramsElement] -> m ()
250 addListNgrams listId ngramsType nes = do
252 liftBase $ modifyMVar_ var $
253 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
256 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
259 -- | TODO: incr the Version number
260 -- && should use patch
263 setListNgrams :: RepoCmdM env err m
265 -> TableNgrams.NgramsType
266 -> Map NgramsTerm NgramsRepoElement
268 setListNgrams listId ngramsType ns = do
270 liftBase $ modifyMVar_ var $
273 %~ Just . (at listId .~ Just ns) . something
275 printDebug "List modified" NodeList
277 setListNgrams' :: HasNodeStory env err m
279 -> TableNgrams.NgramsType
280 -> Map NgramsTerm NgramsRepoElement
282 setListNgrams' listId ngramsType ns = do
283 getter <- view hasNodeStory
284 var <- liftBase $ (getter ^. nse_getter) listId
285 liftBase $ modifyMVar_ var $
295 currentVersion :: RepoCmdM env err m
299 r <- liftBase $ readMVar var
300 pure $ r ^. r_version
301 currentVersion' :: HasNodeStory env err m
302 => ListId -> m Version
303 currentVersion' listId = do
304 nls <- getRepo' [listId]
305 pure $ nls ^. unNodeStory . at listId . _Just . a_version
310 newNgramsFromNgramsStatePatch :: NgramsStatePatch -> [Ngrams]
311 newNgramsFromNgramsStatePatch p =
312 [ text2ngrams (unNgramsTerm n)
313 | (n,np) <- p ^.. _PatchMap . each . _PatchMap . each . _NgramsTablePatch . _PatchMap . ifolded . withIndex
314 , _ <- np ^.. patch_new . _Just
316 newNgramsFromNgramsStatePatch' :: NgramsStatePatch' -> [Ngrams]
317 newNgramsFromNgramsStatePatch' p =
318 [ text2ngrams (unNgramsTerm n)
319 | (n,np) <- p ^.. _PatchMap
320 -- . each . _PatchMap
321 . each . _NgramsTablePatch
322 . _PatchMap . ifolded . withIndex
323 , _ <- np ^.. patch_new . _Just
327 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
328 commitStatePatch :: RepoCmdM env err m
329 => Versioned NgramsStatePatch
330 -> m (Versioned NgramsStatePatch)
331 commitStatePatch (Versioned p_version p) = do
333 vq' <- liftBase $ modifyMVar var $ \r -> do
335 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
336 (p', q') = transformWith ngramsStatePatchConflictResolution p q
337 r' = r & r_version +~ 1
339 & r_history %~ (p' :)
341 -- Ideally we would like to check these properties. However:
342 -- * They should be checked only to debug the code. The client data
343 -- should be able to trigger these.
344 -- * What kind of error should they throw (we are in IO here)?
345 -- * Should we keep modifyMVar?
346 -- * Should we throw the validation in an Exception, catch it around
347 -- modifyMVar and throw it back as an Error?
348 assertValid $ transformable p q
349 assertValid $ applicable p' (r ^. r_state)
351 pure (r', Versioned (r' ^. r_version) q')
356 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
360 commitStatePatch' :: HasNodeStory env err m
362 -> Versioned NgramsStatePatch'
363 -> m (Versioned NgramsStatePatch')
364 commitStatePatch' listId (Versioned p_version p) = do
365 var <- getRepoVar listId
366 vq' <- liftBase $ modifyMVar var $ \ns -> do
368 a = ns ^. unNodeStory . at listId . _Just
369 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
370 (p', q') = transformWith ngramsStatePatchConflictResolution' p q
371 a' = a & a_version +~ 1
373 & a_history %~ (p' :)
374 pure ( ns & unNodeStory . at listId .~ (Just a')
375 , Versioned (a' ^. a_version) q'
379 _ <- insertNgrams (newNgramsFromNgramsStatePatch' p)
385 -- This is a special case of tableNgramsPut where the input patch is empty.
386 tableNgramsPull :: RepoCmdM env err m
388 -> TableNgrams.NgramsType
390 -> m (Versioned NgramsTablePatch)
391 tableNgramsPull listId ngramsType p_version = do
393 r <- liftBase $ readMVar var
396 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
397 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
399 pure (Versioned (r ^. r_version) q_table)
401 -- Apply the given patch to the DB and returns the patch to be applied on the
404 tableNgramsPut :: ( FlowCmdM env err m
409 -> Versioned NgramsTablePatch
410 -> m (Versioned NgramsTablePatch)
411 tableNgramsPut tabType listId (Versioned p_version p_table)
412 | p_table == mempty = do
413 let ngramsType = ngramsTypeFromTabType tabType
414 tableNgramsPull listId ngramsType p_version
417 let ngramsType = ngramsTypeFromTabType tabType
418 (p0, p0_validity) = PM.singleton listId p_table
419 (p, p_validity) = PM.singleton ngramsType p0
421 assertValid p0_validity
422 assertValid p_validity
424 ret <- commitStatePatch (Versioned p_version p)
425 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
430 tableNgramsPostChartsAsync :: ( FlowCmdM env err m
434 => UpdateTableNgramsCharts
437 tableNgramsPostChartsAsync utn logStatus = do
438 let tabType = utn ^. utn_tab_type
439 let listId = utn ^. utn_list_id
441 node <- getNode listId
442 let nId = node ^. node_id
443 _uId = node ^. node_user_id
444 mCId = node ^. node_parent_id
446 printDebug "[tableNgramsPut] tabType" tabType
447 printDebug "[tableNgramsPut] listId" listId
451 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
452 pure $ jobLogFail $ jobLogInit 1
456 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
457 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
459 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
464 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
465 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
466 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
468 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
469 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
471 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
472 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
474 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
479 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
480 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
482 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
487 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
488 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
491 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
493 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
495 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
497 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
499 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
501 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
507 printDebug "[tableNgramsPut] no update for tabType = " tabType
508 pure $ jobLogFail $ jobLogInit 1
511 { _ne_list :: ListType
512 If we merge the parents/children we can potentially create cycles!
513 , _ne_parent :: Maybe NgramsTerm
514 , _ne_children :: MSet NgramsTerm
518 getNgramsTableMap :: RepoCmdM env err m
520 -> TableNgrams.NgramsType
521 -> m (Versioned NgramsTableMap)
522 getNgramsTableMap nodeId ngramsType = do
524 repo <- liftBase $ readMVar v
525 pure $ Versioned (repo ^. r_version)
526 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
528 dumpJsonTableMap :: RepoCmdM env err m
531 -> TableNgrams.NgramsType
533 dumpJsonTableMap fpath nodeId ngramsType = do
534 m <- getNgramsTableMap nodeId ngramsType
535 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
541 -- | TODO Errors management
542 -- TODO: polymorphic for Annuaire or Corpus or ...
543 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
544 -- TODO: should take only one ListId
547 getTableNgrams :: forall env err m.
548 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
549 => NodeType -> NodeId -> TabType
550 -> ListId -> Limit -> Maybe Offset
552 -> Maybe MinSize -> Maybe MaxSize
554 -> (NgramsTerm -> Bool)
555 -> m (VersionedWithCount NgramsTable)
556 getTableNgrams _nType nId tabType listId limit_ offset
557 listType minSize maxSize orderBy searchQuery = do
560 -- lIds <- selectNodesWithUsername NodeList userMaster
562 ngramsType = ngramsTypeFromTabType tabType
563 offset' = maybe 0 identity offset
564 listType' = maybe (const True) (==) listType
565 minSize' = maybe (const True) (<=) minSize
566 maxSize' = maybe (const True) (>=) maxSize
568 selected_node n = minSize' s
570 && searchQuery (n ^. ne_ngrams)
571 && listType' (n ^. ne_list)
575 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
577 ---------------------------------------
578 sortOnOrder Nothing = identity
579 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
580 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
581 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
582 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
584 ---------------------------------------
586 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
587 filteredNodes tableMap = rootOf <$> list & filter selected_node
589 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
593 list = tableMap ^.. each
595 ---------------------------------------
596 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
597 selectAndPaginate tableMap = roots <> inners
599 list = tableMap ^.. each
600 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
604 selected_nodes = list & take limit_
606 . filter selected_node
607 . sortOnOrder orderBy
608 roots = rootOf <$> selected_nodes
609 rootsSet = Set.fromList (_ne_ngrams <$> roots)
610 inners = list & filter (selected_inner rootsSet)
612 ---------------------------------------
613 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
614 setScores False table = pure table
615 setScores True table = do
616 let ngrams_terms = table ^.. each . ne_ngrams
618 occurrences <- getOccByNgramsOnlyFast' nId
623 liftBase $ hprint stderr
624 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
625 (length ngrams_terms) t1 t2
627 occurrences <- getOccByNgramsOnlySlow nType nId
633 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
635 pure $ table & each %~ setOcc
636 ---------------------------------------
638 -- lists <- catMaybes <$> listsWith userMaster
639 -- trace (show lists) $
640 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
643 let scoresNeeded = needsScores orderBy
644 tableMap1 <- getNgramsTableMap listId ngramsType
646 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
647 . Map.mapWithKey ngramsElementFromRepo
649 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
651 let fltrCount = length $ fltr ^. v_data . _NgramsTable
654 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
655 . setScores (not scoresNeeded)
658 liftBase $ hprint stderr
659 ("getTableNgrams total=" % hasTime
663 % " sql=" % (if scoresNeeded then "map2" else "map3")
665 ) t0 t3 t0 t1 t1 t2 t2 t3
666 pure $ toVersionedWithCount fltrCount tableMap3
669 scoresRecomputeTableNgrams :: forall env err m.
670 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
671 => NodeId -> TabType -> ListId -> m Int
672 scoresRecomputeTableNgrams nId tabType listId = do
673 tableMap <- getNgramsTableMap listId ngramsType
674 _ <- tableMap & v_data %%~ setScores
675 . Map.mapWithKey ngramsElementFromRepo
679 ngramsType = ngramsTypeFromTabType tabType
681 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
683 let ngrams_terms = table ^.. each . ne_ngrams
684 occurrences <- getOccByNgramsOnlyFast' nId
689 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
691 pure $ table & each %~ setOcc
697 -- TODO: find a better place for the code above, All APIs stay here
699 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
700 deriving (Generic, Enum, Bounded, Read, Show)
702 instance FromHttpApiData OrderBy
704 parseUrlPiece "TermAsc" = pure TermAsc
705 parseUrlPiece "TermDesc" = pure TermDesc
706 parseUrlPiece "ScoreAsc" = pure ScoreAsc
707 parseUrlPiece "ScoreDesc" = pure ScoreDesc
708 parseUrlPiece _ = Left "Unexpected value of OrderBy"
711 instance ToParamSchema OrderBy
712 instance FromJSON OrderBy
713 instance ToJSON OrderBy
714 instance ToSchema OrderBy
715 instance Arbitrary OrderBy
717 arbitrary = elements [minBound..maxBound]
719 needsScores :: Maybe OrderBy -> Bool
720 needsScores (Just ScoreAsc) = True
721 needsScores (Just ScoreDesc) = True
722 needsScores _ = False
724 type TableNgramsApiGet = Summary " Table Ngrams API Get"
725 :> QueryParamR "ngramsType" TabType
726 :> QueryParamR "list" ListId
727 :> QueryParamR "limit" Limit
728 :> QueryParam "offset" Offset
729 :> QueryParam "listType" ListType
730 :> QueryParam "minTermSize" MinSize
731 :> QueryParam "maxTermSize" MaxSize
732 :> QueryParam "orderBy" OrderBy
733 :> QueryParam "search" Text
734 :> Get '[JSON] (VersionedWithCount NgramsTable)
736 type TableNgramsApiPut = Summary " Table Ngrams API Change"
737 :> QueryParamR "ngramsType" TabType
738 :> QueryParamR "list" ListId
739 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
740 :> Put '[JSON] (Versioned NgramsTablePatch)
742 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
743 :> QueryParamR "ngramsType" TabType
744 :> QueryParamR "list" ListId
745 :> "recompute" :> Post '[JSON] Int
747 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
748 :> QueryParamR "ngramsType" TabType
749 :> QueryParamR "list" ListId
750 :> Get '[JSON] Version
752 type TableNgramsApi = TableNgramsApiGet
753 :<|> TableNgramsApiPut
754 :<|> RecomputeScoresNgramsApiGet
755 :<|> "version" :> TableNgramsApiGetVersion
756 :<|> TableNgramsAsyncApi
758 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
762 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
764 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
771 -> Maybe MinSize -> Maybe MaxSize
773 -> Maybe Text -- full text search
774 -> m (VersionedWithCount NgramsTable)
775 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
776 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
778 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
780 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
785 getTableNgramsVersion _nId _tabType _listId = currentVersion
787 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
788 -- This line above looks like a waste of computation to finally get only the version.
789 -- See the comment about listNgramsChangedSince.
792 -- | Text search is deactivated for now for ngrams by doc only
793 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
795 -> ListId -> Limit -> Maybe Offset
797 -> Maybe MinSize -> Maybe MaxSize
799 -> Maybe Text -- full text search
800 -> m (VersionedWithCount NgramsTable)
801 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
802 ns <- selectNodesWithUsername NodeList userMaster
803 let ngramsType = ngramsTypeFromTabType tabType
804 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
805 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
806 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
810 apiNgramsTableCorpus :: ( GargServerC env err m
812 => NodeId -> ServerT TableNgramsApi m
813 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
815 :<|> scoresRecomputeTableNgrams cId
816 :<|> getTableNgramsVersion cId
817 :<|> apiNgramsAsync cId
819 apiNgramsTableDoc :: ( GargServerC env err m
821 => DocId -> ServerT TableNgramsApi m
822 apiNgramsTableDoc dId = getTableNgramsDoc dId
824 :<|> scoresRecomputeTableNgrams dId
825 :<|> getTableNgramsVersion dId
826 :<|> apiNgramsAsync dId
827 -- > index all the corpus accordingly (TODO AD)
829 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
830 apiNgramsAsync _dId =
832 JobFunction $ \i log ->
835 printDebug "tableNgramsPostChartsAsync" x
837 in tableNgramsPostChartsAsync i log'
839 -- Did the given list of ngrams changed since the given version?
840 -- The returned value is versioned boolean value, meaning that one always retrieve the
842 -- If the given version is negative then one simply receive the latest version and True.
843 -- Using this function is more precise than simply comparing the latest version number
844 -- with the local version number. Indeed there might be no change to this particular list
845 -- and still the version number has changed because of other lists.
847 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
848 -- * currentVersion: good computation, good bandwidth, bad precision.
849 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
850 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
851 listNgramsChangedSince :: RepoCmdM env err m
852 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
853 listNgramsChangedSince listId ngramsType version
855 Versioned <$> currentVersion <*> pure True
857 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)