-{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
{-|
Module : Gargantext.API.Ngrams
Description : Server API
-- | TODO
get ngrams filtered by NgramsType
-add get
+add get
-}
+{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
+
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE IncoherentInstances #-}
module Gargantext.API.Ngrams
( TableNgramsApi
, TableNgramsApiGet
, TableNgramsApiPut
, getTableNgrams
+ , getTableNgramsCorpus
, setListNgrams
--, rmListNgrams TODO fix before exporting
, apiNgramsTableCorpus
, apiNgramsTableDoc
- , NgramsStatePatch
, NgramsTablePatch
, NgramsTableMap
, r_version
, r_state
, r_history
- , NgramsRepo
, NgramsRepoElement(..)
- , saveRepo
+ , saveNodeStory
+ , saveNodeStoryImmediate
, initRepo
- , RepoEnv(..)
- , renv_var
- , renv_lock
-
, TabType(..)
- , HasRepoVar(..)
- , HasRepoSaver(..)
- , HasRepo(..)
- , RepoCmdM
, QueryParamR
, TODO
, tableNgramsPull
, tableNgramsPut
+ , getNgramsTable'
+ , setNgramsTableScores
+
, Version
, Versioned(..)
+ , VersionedWithCount(..)
, currentVersion
, listNgramsChangedSince
+ , MinSize, MaxSize, OrderBy, NgramsTable
+ , UpdateTableNgramsCharts
)
where
import Control.Concurrent
-import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
+import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
import Control.Monad.Reader
import Data.Aeson hiding ((.=))
-import qualified Data.Aeson.Text as DAT
import Data.Either (Either(..))
import Data.Foldable
-import qualified Data.List as List
import Data.Map.Strict (Map)
-import qualified Data.Map.Strict as Map
-import qualified Data.Map.Strict.Patch as PM
import Data.Maybe (fromMaybe)
import Data.Monoid
import Data.Ord (Down(..))
import Data.Patch.Class (Action(act), Transformable(..), ours)
-import qualified Data.Set as S
-import qualified Data.Set as Set
import Data.Swagger hiding (version, patch)
-import Data.Text (Text, isInfixOf, unpack)
+import Data.Text (Text, isInfixOf, toLower, unpack, pack)
import Data.Text.Lazy.IO as DTL
import Formatting (hprint, int, (%))
-import Formatting.Clock (timeSpecs)
import GHC.Generics (Generic)
-import Servant hiding (Patch)
-import System.Clock (getTime, TimeSpec, Clock(..))
-import Servant.Job.Async (JobFunction(..), serveJobsAPI)
-import System.IO (stderr)
-import Test.QuickCheck (elements)
-import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
-
-import Prelude (error)
-import Gargantext.Prelude hiding (log)
-
+import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
-import qualified Gargantext.API.Metrics as Metrics
+import Gargantext.API.Job
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude
-import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
-import Gargantext.Core.Utils (something)
--- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
--- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
+import Gargantext.Core.NodeStory
+import Gargantext.Core.Mail.Types (HasMail)
+import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
+import Gargantext.API.Ngrams.Tools
import Gargantext.Database.Action.Flow.Types
-import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
+import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (NodeType(..))
-import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
+import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
+import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
+import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select
-import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
+import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
+import Gargantext.Prelude hiding (log)
+import Gargantext.Prelude.Clock (hasTime, getTime)
+import Prelude (error)
+import Servant hiding (Patch)
+import Gargantext.Utils.Jobs (serveJobsAPI)
+import System.IO (stderr)
+import Test.QuickCheck (elements)
+import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
+import qualified Data.Aeson.Text as DAT
+import qualified Data.List as List
+import qualified Data.Map.Strict as Map
+import qualified Data.Map.Strict.Patch as PM
+import qualified Data.Set as S
+import qualified Data.Set as Set
+import qualified Gargantext.API.Metrics as Metrics
import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
-import Gargantext.Database.Query.Table.Node (getNode)
-import Gargantext.Database.Schema.Node (node_id, node_parentId, node_userId)
-import Gargantext.Prelude.Job
{-
-- TODO sequences of modifications (Patchs)
------------------------------------------------------------------------
-saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
+saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
+ => m ()
+saveNodeStory = do
+ saver <- view hasNodeStorySaver
+ liftBase $ do
+ --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
+ saver
+ --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
+
+
+saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
=> m ()
-saveRepo = liftBase =<< view repoSaver
+saveNodeStoryImmediate = do
+ saver <- view hasNodeStoryImmediateSaver
+ liftBase $ do
+ --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
+ saver
+ --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
listTypeConflictResolution :: ListType -> ListType -> ListType
listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
+
ngramsStatePatchConflictResolution
:: TableNgrams.NgramsType
- -> NodeId
-> NgramsTerm
-> ConflictResolutionNgramsPatch
-ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
- = (ours, (const ours, ours), (False, False))
+ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
+ = (ours, (const ours, ours), (False, False))
-- (False, False) mean here that Mod has always priority.
+ -- = (ours, (const ours, ours), (True, False))
-- (True, False) <- would mean priority to the left (same as ours).
-
-- undefined {- TODO think this through -}, listTypeConflictResolution)
+
+
+
-- Current state:
-- Insertions are not considered as patches,
-- they do not extend history,
var <- view repoVar
liftBase $ modifyMVar_ var $
pure . (r_state . at ngramsType %~ (Just . f . something))
- saveRepo
+ saveNodeStory
where
f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
var <- view repoVar
liftBase $ modifyMVar_ var $
pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
- saveRepo
+ saveNodeStory
where
m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
-}
--- UNSAFE
-rmListNgrams :: RepoCmdM env err m
- => ListId
- -> TableNgrams.NgramsType
- -> m ()
-rmListNgrams l nt = setListNgrams l nt mempty
-
-- | TODO: incr the Version number
-- && should use patch
-- UNSAFE
-setListNgrams :: RepoCmdM env err m
+
+setListNgrams :: HasNodeStory env err m
=> NodeId
-> TableNgrams.NgramsType
-> Map NgramsTerm NgramsRepoElement
-> m ()
setListNgrams listId ngramsType ns = do
- var <- view repoVar
+ -- printDebug "[setListNgrams]" (listId, ngramsType)
+ getter <- view hasNodeStory
+ var <- liftBase $ (getter ^. nse_getter) [listId]
liftBase $ modifyMVar_ var $
- pure . ( r_state
- . at ngramsType %~
- (Just .
- (at listId .~ ( Just ns))
- . something
- )
+ pure . ( unNodeStory
+ . at listId . _Just
+ . a_state
+ . at ngramsType
+ .~ Just ns
)
- saveRepo
-
+ saveNodeStory
-currentVersion :: RepoCmdM env err m
- => m Version
-currentVersion = do
- var <- view repoVar
- r <- liftBase $ readMVar var
- pure $ r ^. r_version
-newNgramsFromNgramsStatePatch :: NgramsStatePatch -> [Ngrams]
+newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
newNgramsFromNgramsStatePatch p =
[ text2ngrams (unNgramsTerm n)
- | (n,np) <- p ^.. _PatchMap . each . _PatchMap . each . _NgramsTablePatch . _PatchMap . ifolded . withIndex
+ | (n,np) <- p ^.. _PatchMap
+ -- . each . _PatchMap
+ . each . _NgramsTablePatch
+ . _PatchMap . ifolded . withIndex
, _ <- np ^.. patch_new . _Just
]
--- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
-commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
-commitStatePatch (Versioned p_version p) = do
- var <- view repoVar
- vq' <- liftBase $ modifyMVar var $ \r -> do
+
+
+
+commitStatePatch :: ( HasNodeStory env err m
+ , HasNodeStoryImmediateSaver env
+ , HasNodeArchiveStoryImmediateSaver env
+ , HasMail env)
+ => ListId
+ -> Versioned NgramsStatePatch'
+ -> m (Versioned NgramsStatePatch')
+commitStatePatch listId (Versioned _p_version p) = do
+ -- printDebug "[commitStatePatch]" listId
+ var <- getNodeStoryVar [listId]
+ archiveSaver <- view hasNodeArchiveStoryImmediateSaver
+ vq' <- liftBase $ modifyMVar var $ \ns -> do
+ let
+ a = ns ^. unNodeStory . at listId . _Just
+ -- apply patches from version p_version to a ^. a_version
+ -- TODO Check this
+ --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
+ q = mconcat $ a ^. a_history
+
+ --printDebug "[commitStatePatch] transformWith" (p,q)
+ -- let tws s = case s of
+ -- (Mod p) -> "Mod"
+ -- _ -> "Rpl"
+ -- printDebug "[commitStatePatch] transformWith" (tws $ p ^. _NgramsPatch, tws $ q ^. _NgramsPatch)
+
let
- q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
(p', q') = transformWith ngramsStatePatchConflictResolution p q
- r' = r & r_version +~ 1
- & r_state %~ act p'
- & r_history %~ (p' :)
+ a' = a & a_version +~ 1
+ & a_state %~ act p'
+ & a_history %~ (p' :)
+
{-
-- Ideally we would like to check these properties. However:
-- * They should be checked only to debug the code. The client data
assertValid $ transformable p q
assertValid $ applicable p' (r ^. r_state)
-}
- pure (r', Versioned (r' ^. r_version) q')
+ -- printDebug "[commitStatePatch] a version" (a ^. a_version)
+ -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
+ let newNs = ( ns & unNodeStory . at listId .~ (Just a')
+ , Versioned (a' ^. a_version) q'
+ )
+
+ -- NOTE Now is the only good time to save the archive history. We
+ -- have the handle to the MVar and we need to save its exact
+ -- snapshot. Node Story archive is a linear table, so it's only
+ -- couple of inserts, it shouldn't take long...
+
+ -- If we postponed saving the archive to the debounce action, we
+ -- would have issues like
+ -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
+ -- where the `q` computation from above (which uses the archive)
+ -- would cause incorrect patch application (before the previous
+ -- archive was saved and applied)
+ newNs' <- archiveSaver $ fst newNs
+
+ pure (newNs', snd newNs)
+
+ -- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce)
+ saveNodeStory
+ --saveNodeStoryImmediate
+ -- Save new ngrams
+ _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
- saveRepo
pure vq'
+
+
-- This is a special case of tableNgramsPut where the input patch is empty.
-tableNgramsPull :: RepoCmdM env err m
+tableNgramsPull :: HasNodeStory env err m
=> ListId
-> TableNgrams.NgramsType
-> Version
-> m (Versioned NgramsTablePatch)
tableNgramsPull listId ngramsType p_version = do
- var <- view repoVar
+ printDebug "[tableNgramsPull]" (listId, ngramsType)
+ var <- getNodeStoryVar [listId]
r <- liftBase $ readMVar var
let
- q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
- q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
+ a = r ^. unNodeStory . at listId . _Just
+ q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
+ q_table = q ^. _PatchMap . at ngramsType . _Just
+
+ pure (Versioned (a ^. a_version) q_table)
- pure (Versioned (r ^. r_version) q_table)
+
+
+-- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
-- Apply the given patch to the DB and returns the patch to be applied on the
-- client.
-- TODO-ACCESS check
-tableNgramsPut :: ( FlowCmdM env err m
- , HasSettings env
+tableNgramsPut :: ( HasNodeStory env err m
+ , HasNodeStoryImmediateSaver env
+ , HasNodeArchiveStoryImmediateSaver env
+ , HasInvalidError err
+ , HasSettings env
+ , HasMail env
)
=> TabType
-> ListId
-> m (Versioned NgramsTablePatch)
tableNgramsPut tabType listId (Versioned p_version p_table)
| p_table == mempty = do
+ printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
let ngramsType = ngramsTypeFromTabType tabType
tableNgramsPull listId ngramsType p_version
| otherwise = do
+ printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
let ngramsType = ngramsTypeFromTabType tabType
- (p0, p0_validity) = PM.singleton listId p_table
- (p, p_validity) = PM.singleton ngramsType p0
+ (p, p_validity) = PM.singleton ngramsType p_table
- assertValid p0_validity
assertValid p_validity
- ret <- commitStatePatch (Versioned p_version p)
- <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
+ ret <- commitStatePatch listId (Versioned p_version p)
+ <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
pure ret
-tableNgramsPostChartsAsync :: ( FlowCmdM env err m
+
+tableNgramsPostChartsAsync :: ( HasNodeStory env err m
+ , FlowCmdM env err m
, HasNodeError err
, HasSettings env
)
node <- getNode listId
let nId = node ^. node_id
- _uId = node ^. node_userId
- mCId = node ^. node_parentId
+ _uId = node ^. node_user_id
+ mCId = node ^. node_parent_id
- printDebug "[tableNgramsPut] tabType" tabType
- printDebug "[tableNgramsPut] listId" listId
+ -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
+ -- printDebug "[tableNgramsPostChartsAsync] listId" listId
case mCId of
Nothing -> do
- printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
+ printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
pure $ jobLogFail $ jobLogInit 1
Just cId -> do
case tabType of
Authors -> do
- -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
+ -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
(logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
logRef
_ <- Metrics.updatePie cId (Just listId) tabType Nothing
getRef
Institutes -> do
- -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
- -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
+ -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
+ -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
(logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
logRef
_ <- Metrics.updateTree cId (Just listId) tabType StopTerm
- -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
+ -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
logRefSuccess
_ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
- -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
+ -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
logRefSuccess
_ <- Metrics.updateTree cId (Just listId) tabType MapTerm
logRefSuccess
getRef
Sources -> do
- -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
+ -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
(logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
logRef
_ <- Metrics.updatePie cId (Just listId) tabType Nothing
getRef
Terms -> do
- -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
+ -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
(logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
logRef
{-
getRef
_ -> do
- printDebug "[tableNgramsPut] no update for tabType = " tabType
+ printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
pure $ jobLogFail $ jobLogInit 1
{-
}
-}
-getNgramsTableMap :: RepoCmdM env err m
+getNgramsTableMap :: HasNodeStory env err m
=> NodeId
-> TableNgrams.NgramsType
-> m (Versioned NgramsTableMap)
getNgramsTableMap nodeId ngramsType = do
- v <- view repoVar
+ v <- getNodeStoryVar [nodeId]
repo <- liftBase $ readMVar v
- pure $ Versioned (repo ^. r_version)
- (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
+ pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
+ (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
+
-dumpJsonTableMap :: RepoCmdM env err m
+dumpJsonTableMap :: HasNodeStory env err m
=> Text
-> NodeId
-> TableNgrams.NgramsType
liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
pure ()
+
type MinSize = Int
type MaxSize = Int
-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
-- TODO: should take only one ListId
-getTime' :: MonadBase IO m => m TimeSpec
-getTime' = liftBase $ getTime ProcessCPUTime
-
getTableNgrams :: forall env err m.
- (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
+ (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
=> NodeType -> NodeId -> TabType
- -> ListId -> Limit -> Maybe Offset
+ -> ListId -> Limit -> Maybe Offset
-> Maybe ListType
-> Maybe MinSize -> Maybe MaxSize
-> Maybe OrderBy
-> (NgramsTerm -> Bool)
- -> m (Versioned NgramsTable)
+ -> m (VersionedWithCount NgramsTable)
getTableNgrams _nType nId tabType listId limit_ offset
listType minSize maxSize orderBy searchQuery = do
- t0 <- getTime'
+ t0 <- getTime
-- lIds <- selectNodesWithUsername NodeList userMaster
let
ngramsType = ngramsTypeFromTabType tabType
minSize' = maybe (const True) (<=) minSize
maxSize' = maybe (const True) (>=) maxSize
+ rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
+ (tableMap ^. at r)
+ )
+ (ne ^. ne_root)
+
selected_node n = minSize' s
&& maxSize' s
&& searchQuery (n ^. ne_ngrams)
selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
---------------------------------------
- sortOnOrder Nothing = identity
+ sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
- sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
- sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
+ sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to List.nub . to length)
+ sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to List.nub . to length)
---------------------------------------
- selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
- selectAndPaginate tableMap = roots <> inners
+ -- | Filter the given `tableMap` with the search criteria.
+ filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
+ filteredNodes tableMap = roots
where
list = tableMap ^.. each
- rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
- (ne ^. ne_root)
- selected_nodes = list & take limit_
- . drop offset'
- . filter selected_node
- . sortOnOrder orderBy
- roots = rootOf <$> selected_nodes
- rootsSet = Set.fromList (_ne_ngrams <$> roots)
- inners = list & filter (selected_inner rootsSet)
+ selected_nodes = list & filter selected_node
+ roots = rootOf tableMap <$> selected_nodes
- ---------------------------------------
- setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
- setScores False table = pure table
- setScores True table = do
- let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
- t1 <- getTime'
- occurrences <- getOccByNgramsOnlyFast' nId
- listId
- ngramsType
- ngrams_terms
- t2 <- getTime'
- liftBase $ hprint stderr
- ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
- (length ngrams_terms) t1 t2
- {-
- occurrences <- getOccByNgramsOnlySlow nType nId
- (lIds <> [listId])
- ngramsType
- ngrams_terms
- -}
- let
- setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
+ -- | Appends subitems (selected from `tableMap`) for given `roots`.
+ withInners :: Map NgramsTerm NgramsElement -> [NgramsElement] -> [NgramsElement]
+ withInners tableMap roots = roots <> inners
+ where
+ list = tableMap ^.. each
+ rootSet = Set.fromList (_ne_ngrams <$> roots)
+ inners = list & filter (selected_inner rootSet)
- pure $ table & each %~ setOcc
- ---------------------------------------
+ -- | Paginate the results
+ sortAndPaginate :: [NgramsElement] -> [NgramsElement]
+ sortAndPaginate = take limit_
+ . drop offset'
+ . sortOnOrder orderBy
- -- lists <- catMaybes <$> listsWith userMaster
- -- trace (show lists) $
- -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
+ ---------------------------------------
let scoresNeeded = needsScores orderBy
- tableMap1 <- getNgramsTableMap listId ngramsType
- t1 <- getTime'
- tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
- . Map.mapWithKey ngramsElementFromRepo
- t2 <- getTime'
- tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
- . setScores (not scoresNeeded)
- . selectAndPaginate
- t3 <- getTime'
- liftBase $ hprint stderr
- ("getTableNgrams total=" % timeSpecs
- % " map1=" % timeSpecs
- % " map2=" % timeSpecs
- % " map3=" % timeSpecs
- % " sql=" % (if scoresNeeded then "map2" else "map3")
- % "\n"
- ) t0 t3 t0 t1 t1 t2 t2 t3
- pure tableMap3
-
-
-scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
+ t1 <- getTime
+
+ tableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
+
+ let fltr = tableMap & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
+
+ let fltrCount = length $ fltr ^. v_data . _NgramsTable
+
+ t2 <- getTime
+ let tableMapSorted = over (v_data . _NgramsTable) ((withInners (tableMap ^. v_data)) . sortAndPaginate) fltr
+ t3 <- getTime
+ --printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
+ liftBase $ do
+ hprint stderr
+ ("getTableNgrams total=" % hasTime
+ % " map1=" % hasTime
+ % " map2=" % hasTime
+ % " map3=" % hasTime
+ % " sql=" % (if scoresNeeded then "map2" else "map3")
+ % "\n"
+ ) t0 t3 t0 t1 t1 t2 t2 t3
+
+ -- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
+ pure $ toVersionedWithCount fltrCount tableMapSorted
+
+
+-- | Helper function to get the ngrams table with scores.
+getNgramsTable' :: forall env err m.
+ ( HasNodeStory env err m
+ , HasNodeError err
+ , HasConnectionPool env
+ , HasConfig env
+ , HasMail env)
+ => NodeId
+ -> ListId
+ -> TableNgrams.NgramsType
+ -> m (Versioned (Map.Map NgramsTerm NgramsElement))
+getNgramsTable' nId listId ngramsType = do
+ tableMap <- getNgramsTableMap listId ngramsType
+ tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
+ . Map.mapWithKey ngramsElementFromRepo
+
+-- | Helper function to set scores on an `NgramsTable`.
+setNgramsTableScores :: forall env err m t.
+ ( Each t t NgramsElement NgramsElement
+ , HasNodeStory env err m
+ , HasNodeError err
+ , HasConnectionPool env
+ , HasConfig env
+ , HasMail env)
+ => NodeId
+ -> ListId
+ -> TableNgrams.NgramsType
+ -> t
+ -> m t
+setNgramsTableScores nId listId ngramsType table = do
+ t1 <- getTime
+ occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
+ --printDebug "[setNgramsTableScores] occurrences" occurrences
+ t2 <- getTime
+ liftBase $ do
+ let ngrams_terms = table ^.. each . ne_ngrams
+ -- printDebug "ngrams_terms" ngrams_terms
+ hprint stderr
+ ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
+ (length ngrams_terms) t1 t2
+ let
+ setOcc ne = ne & ne_occurrences .~ msumOf (at (ne ^. ne_ngrams) . _Just) occurrences
+
+ --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
+
+ pure $ table & each %~ setOcc
+
+
+
+
+scoresRecomputeTableNgrams :: forall env err m.
+ (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
+ => NodeId -> TabType -> ListId -> m Int
scoresRecomputeTableNgrams nId tabType listId = do
tableMap <- getNgramsTableMap listId ngramsType
- _ <- tableMap & v_data %%~ setScores
+ _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
. Map.mapWithKey ngramsElementFromRepo
pure $ 1
where
ngramsType = ngramsTypeFromTabType tabType
- setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
- setScores table = do
- let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
- occurrences <- getOccByNgramsOnlyFast' nId
- listId
- ngramsType
- ngrams_terms
- let
- setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
-
- pure $ table & each %~ setOcc
-
-
-- APIs
parseUrlPiece "ScoreDesc" = pure ScoreDesc
parseUrlPiece _ = Left "Unexpected value of OrderBy"
+instance ToHttpApiData OrderBy where
+ toUrlPiece = pack . show
instance ToParamSchema OrderBy
instance FromJSON OrderBy
:> QueryParam "maxTermSize" MaxSize
:> QueryParam "orderBy" OrderBy
:> QueryParam "search" Text
- :> Get '[JSON] (Versioned NgramsTable)
+ :> Get '[JSON] (VersionedWithCount NgramsTable)
type TableNgramsApiPut = Summary " Table Ngrams API Change"
:> QueryParamR "ngramsType" TabType
:> "update"
:> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
-getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
+getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
=> NodeId
-> TabType
-> ListId
-> Maybe MinSize -> Maybe MaxSize
-> Maybe OrderBy
-> Maybe Text -- full text search
- -> m (Versioned NgramsTable)
+ -> m (VersionedWithCount NgramsTable)
getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
where
- searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
+ searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt)
+
+
-getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
+getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
=> NodeId
-> TabType
-> ListId
-> m Version
-getTableNgramsVersion _nId _tabType _listId = currentVersion
+getTableNgramsVersion _nId _tabType listId = currentVersion listId
+
+
+
-- TODO: limit?
-- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
-- This line above looks like a waste of computation to finally get only the version.
-- | Text search is deactivated for now for ngrams by doc only
-getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
+getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
=> DocId -> TabType
-> ListId -> Limit -> Maybe Offset
-> Maybe ListType
-> Maybe MinSize -> Maybe MaxSize
-> Maybe OrderBy
-> Maybe Text -- full text search
- -> m (Versioned NgramsTable)
+ -> m (VersionedWithCount NgramsTable)
getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
ns <- selectNodesWithUsername NodeList userMaster
let ngramsType = ngramsTypeFromTabType tabType
-apiNgramsTableCorpus :: ( GargServerC env err m
- )
- => NodeId -> ServerT TableNgramsApi m
-apiNgramsTableCorpus cId = getTableNgramsCorpus cId
+apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
+apiNgramsTableCorpus cId = getTableNgramsCorpus cId
:<|> tableNgramsPut
:<|> scoresRecomputeTableNgrams cId
- :<|> getTableNgramsVersion cId
- :<|> apiNgramsAsync cId
+ :<|> getTableNgramsVersion cId
+ :<|> apiNgramsAsync cId
-apiNgramsTableDoc :: ( GargServerC env err m
- )
- => DocId -> ServerT TableNgramsApi m
-apiNgramsTableDoc dId = getTableNgramsDoc dId
+apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
+apiNgramsTableDoc dId = getTableNgramsDoc dId
:<|> tableNgramsPut
:<|> scoresRecomputeTableNgrams dId
- :<|> getTableNgramsVersion dId
- :<|> apiNgramsAsync dId
- -- > index all the corpus accordingly (TODO AD)
+ :<|> getTableNgramsVersion dId
+ :<|> apiNgramsAsync dId
-apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
+apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
apiNgramsAsync _dId =
- serveJobsAPI $
- JobFunction $ \i log ->
+ serveJobsAPI TableNgramsJob $ \i log ->
let
log' x = do
printDebug "tableNgramsPostChartsAsync" x
-- * currentVersion: good computation, good bandwidth, bad precision.
-- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
-- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
-listNgramsChangedSince :: RepoCmdM env err m
+listNgramsChangedSince :: HasNodeStory env err m
=> ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
listNgramsChangedSince listId ngramsType version
| version < 0 =
- Versioned <$> currentVersion <*> pure True
+ Versioned <$> currentVersion listId <*> pure True
| otherwise =
tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)