-{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
{-|
Module : Gargantext.API.Ngrams
Description : Server API
-- | TODO
get ngrams filtered by NgramsType
-add get
+add get
-}
+{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
+
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
-{-# OPTIONS -fno-warn-orphans #-}
+{-# LANGUAGE IncoherentInstances #-}
module Gargantext.API.Ngrams
( TableNgramsApi
, TableNgramsApiGet
, TableNgramsApiPut
- , TableNgramsApiPost
, getTableNgrams
+ , getTableNgramsCorpus
, setListNgrams
- , rmListNgrams
- , putListNgrams
- , putListNgrams'
- , tableNgramsPost
+ --, rmListNgrams TODO fix before exporting
, apiNgramsTableCorpus
, apiNgramsTableDoc
- , NgramsStatePatch
, NgramsTablePatch
, NgramsTableMap
+ , NgramsTerm(..)
+
, NgramsElement(..)
, mkNgramsElement
- , mergeNgramsElement
, RootParent(..)
, r_version
, r_state
, r_history
- , NgramsRepo
, NgramsRepoElement(..)
- , saveRepo
+ , saveNodeStory
+ , saveNodeStoryImmediate
, initRepo
- , RepoEnv(..)
- , renv_var
- , renv_lock
-
, TabType(..)
- , ngramsTypeFromTabType
- , HasRepoVar(..)
- , HasRepoSaver(..)
- , HasRepo(..)
- , RepoCmdM
, QueryParamR
, TODO
-- Internals
, getNgramsTableMap
+ , dumpJsonTableMap
, tableNgramsPull
, tableNgramsPut
+ , getNgramsTable'
+ , setNgramsTableScores
+
, Version
, Versioned(..)
+ , VersionedWithCount(..)
, currentVersion
, listNgramsChangedSince
+ , MinSize, MaxSize, OrderBy, NgramsTable
+ , UpdateTableNgramsCharts
)
where
-import Codec.Serialise (Serialise())
-import Control.Category ((>>>))
import Control.Concurrent
-import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
-import Control.Monad.Base (MonadBase, liftBase)
-import Control.Monad.Error.Class (MonadError)
+import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
import Control.Monad.Reader
-import Control.Monad.State
-import Control.Monad.Trans.Control (MonadBaseControl)
import Data.Aeson hiding ((.=))
-import Data.Aeson.TH (deriveJSON)
-import Data.Either(Either(Left))
-import Data.Either.Extra (maybeToEither)
+import Data.Either (Either(..))
import Data.Foldable
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import Data.Monoid
import Data.Ord (Down(..))
-import Data.Patch.Class (Replace, replace, Action(act), Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, ours)
-import Data.Set (Set)
+import Data.Patch.Class (Action(act), Transformable(..), ours)
import Data.Swagger hiding (version, patch)
-import Data.Text (Text, isInfixOf, count)
-import Data.Validity
-import Database.PostgreSQL.Simple.FromField (FromField, fromField)
+import Data.Text (Text, isInfixOf, toLower, unpack, pack)
+import Data.Text.Lazy.IO as DTL
import Formatting (hprint, int, (%))
-import Formatting.Clock (timeSpecs)
import GHC.Generics (Generic)
-import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
-import Gargantext.Core.Types (TODO)
-import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
-import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
-import Gargantext.Database.Query.Table.Node.Select
-import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
+import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
+import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
+import Gargantext.API.Admin.Types (HasSettings)
+import Gargantext.API.Job
+import Gargantext.API.Ngrams.Types
+import Gargantext.API.Prelude
+import Gargantext.Core.NodeStory
+import Gargantext.Core.Mail.Types (HasMail)
+import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
+import Gargantext.API.Ngrams.Tools
+import Gargantext.Database.Action.Flow.Types
+import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Admin.Config (userMaster)
-import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Admin.Types.Node (NodeType(..))
-import Gargantext.Database.Prelude (fromField', HasConnectionPool)
-import Gargantext.Prelude
-import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
+import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
+import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
+import Gargantext.Database.Query.Table.Node (getNode)
+import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
+import Gargantext.Database.Query.Table.Node.Select
+import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
+import Gargantext.Prelude hiding (log)
+import Gargantext.Prelude.Clock (hasTime, getTime)
+import Prelude (error)
import Servant hiding (Patch)
-import System.Clock (getTime, TimeSpec, Clock(..))
-import System.FileLock (FileLock)
+import Gargantext.Utils.Jobs (serveJobsAPI)
import System.IO (stderr)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
-import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
+import qualified Data.Aeson.Text as DAT
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Map.Strict.Patch as PM
import qualified Data.Set as S
import qualified Data.Set as Set
+import qualified Gargantext.API.Metrics as Metrics
import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
-------------------------------------------------------------------------
---data FacetFormat = Table | Chart
-data TabType = Docs | Trash | MoreFav | MoreTrash
- | Terms | Sources | Authors | Institutes
- | Contacts
- deriving (Generic, Enum, Bounded, Show)
-
-instance FromHttpApiData TabType
- where
- parseUrlPiece "Docs" = pure Docs
- parseUrlPiece "Trash" = pure Trash
- parseUrlPiece "MoreFav" = pure MoreFav
- parseUrlPiece "MoreTrash" = pure MoreTrash
-
- parseUrlPiece "Terms" = pure Terms
- parseUrlPiece "Sources" = pure Sources
- parseUrlPiece "Institutes" = pure Institutes
- parseUrlPiece "Authors" = pure Authors
-
- parseUrlPiece "Contacts" = pure Contacts
-
- parseUrlPiece _ = Left "Unexpected value of TabType"
-
-instance ToParamSchema TabType
-instance ToJSON TabType
-instance FromJSON TabType
-instance ToSchema TabType
-instance Arbitrary TabType
- where
- arbitrary = elements [minBound .. maxBound]
-
-newtype MSet a = MSet (Map a ())
- deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
-
-instance ToJSON a => ToJSON (MSet a) where
- toJSON (MSet m) = toJSON (Map.keys m)
- toEncoding (MSet m) = toEncoding (Map.keys m)
-
-mSetFromSet :: Set a -> MSet a
-mSetFromSet = MSet . Map.fromSet (const ())
-
-mSetFromList :: Ord a => [a] -> MSet a
-mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
-
--- mSetToSet :: Ord a => MSet a -> Set a
--- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
-mSetToSet :: Ord a => MSet a -> Set a
-mSetToSet = Set.fromList . mSetToList
-
-mSetToList :: MSet a -> [a]
-mSetToList (MSet a) = Map.keys a
-
-instance Foldable MSet where
- foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
-
-instance (Ord a, FromJSON a) => FromJSON (MSet a) where
- parseJSON = fmap mSetFromList . parseJSON
-
-instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
- -- TODO
- declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
-
-------------------------------------------------------------------------
-type NgramsTerm = Text
-
-data RootParent = RootParent
- { _rp_root :: NgramsTerm
- , _rp_parent :: NgramsTerm
- }
- deriving (Ord, Eq, Show, Generic)
-
-deriveJSON (unPrefix "_rp_") ''RootParent
-makeLenses ''RootParent
-
-data NgramsRepoElement = NgramsRepoElement
- { _nre_size :: Int
- , _nre_list :: ListType
---, _nre_root_parent :: Maybe RootParent
- , _nre_root :: Maybe NgramsTerm
- , _nre_parent :: Maybe NgramsTerm
- , _nre_children :: MSet NgramsTerm
- }
- deriving (Ord, Eq, Show, Generic)
-
-deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
-makeLenses ''NgramsRepoElement
-
-instance ToSchema NgramsRepoElement where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
-
-instance Serialise (MSet NgramsTerm)
-instance Serialise NgramsRepoElement
-
-data NgramsElement =
- NgramsElement { _ne_ngrams :: NgramsTerm
- , _ne_size :: Int
- , _ne_list :: ListType
- , _ne_occurrences :: Int
- , _ne_root :: Maybe NgramsTerm
- , _ne_parent :: Maybe NgramsTerm
- , _ne_children :: MSet NgramsTerm
- }
- deriving (Ord, Eq, Show, Generic)
-
-deriveJSON (unPrefix "_ne_") ''NgramsElement
-makeLenses ''NgramsElement
-
-mkNgramsElement :: NgramsTerm
- -> ListType
- -> Maybe RootParent
- -> MSet NgramsTerm
- -> NgramsElement
-mkNgramsElement ngrams list rp children =
- NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
- where
- -- TODO review
- size = 1 + count " " ngrams
-
-newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
-newNgramsElement mayList ngrams =
- mkNgramsElement ngrams (fromMaybe MapTerm mayList) Nothing mempty
-
-instance ToSchema NgramsElement where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
-instance Arbitrary NgramsElement where
- arbitrary = elements [newNgramsElement Nothing "sport"]
-
-ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
-ngramsElementToRepo
- (NgramsElement { _ne_size = s
- , _ne_list = l
- , _ne_root = r
- , _ne_parent = p
- , _ne_children = c
- }) =
- NgramsRepoElement
- { _nre_size = s
- , _nre_list = l
- , _nre_parent = p
- , _nre_root = r
- , _nre_children = c
- }
-
-ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
-ngramsElementFromRepo
- ngrams
- (NgramsRepoElement
- { _nre_size = s
- , _nre_list = l
- , _nre_parent = p
- , _nre_root = r
- , _nre_children = c
- }) =
- NgramsElement { _ne_size = s
- , _ne_list = l
- , _ne_root = r
- , _ne_parent = p
- , _ne_children = c
- , _ne_ngrams = ngrams
- , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
- {-
- -- Here we could use 0 if we want to avoid any `panic`.
- -- It will not happen using getTableNgrams if
- -- getOccByNgramsOnly provides a count of occurrences for
- -- all the ngrams given.
- -}
- }
-
-------------------------------------------------------------------------
-newtype NgramsTable = NgramsTable [NgramsElement]
- deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
-
-type NgramsList = NgramsTable
-
-makePrisms ''NgramsTable
-
--- | Question: why these repetition of Type in this instance
--- may you document it please ?
-instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
- each = _NgramsTable . each
-
--- TODO discuss
--- | TODO Check N and Weight
-{-
-toNgramsElement :: [NgramsTableData] -> [NgramsElement]
-toNgramsElement ns = map toNgramsElement' ns
- where
- toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
- where
- p' = case p of
- Nothing -> Nothing
- Just x -> lookup x mapParent
- c' = maybe mempty identity $ lookup t mapChildren
- lt' = maybe (panic "API.Ngrams: listypeId") identity lt
-
- mapParent :: Map Int Text
- mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
-
- mapChildren :: Map Text (Set Text)
- mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
- $ Map.fromListWith (<>)
- $ map (first fromJust)
- $ filter (isJust . fst)
- $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
--}
-
-mockTable :: NgramsTable
-mockTable = NgramsTable
- [ mkNgramsElement "animal" MapTerm Nothing (mSetFromList ["dog", "cat"])
- , mkNgramsElement "cat" MapTerm (rp "animal") mempty
- , mkNgramsElement "cats" StopTerm Nothing mempty
- , mkNgramsElement "dog" MapTerm (rp "animal") (mSetFromList ["dogs"])
- , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
- , mkNgramsElement "fox" MapTerm Nothing mempty
- , mkNgramsElement "object" CandidateTerm Nothing mempty
- , mkNgramsElement "nothing" StopTerm Nothing mempty
- , mkNgramsElement "organic" MapTerm Nothing (mSetFromList ["flower"])
- , mkNgramsElement "flower" MapTerm (rp "organic") mempty
- , mkNgramsElement "moon" CandidateTerm Nothing mempty
- , mkNgramsElement "sky" StopTerm Nothing mempty
- ]
- where
- rp n = Just $ RootParent n n
-
-instance Arbitrary NgramsTable where
- arbitrary = pure mockTable
-
-instance ToSchema NgramsTable
-
-------------------------------------------------------------------------
-type NgramsTableMap = Map NgramsTerm NgramsRepoElement
-------------------------------------------------------------------------
--- On the Client side:
---data Action = InGroup NgramsId NgramsId
--- | OutGroup NgramsId NgramsId
--- | SetListType NgramsId ListType
-
-data PatchSet a = PatchSet
- { _rem :: Set a
- , _add :: Set a
- }
- deriving (Eq, Ord, Show, Generic)
-
-makeLenses ''PatchSet
-makePrisms ''PatchSet
-
-instance ToJSON a => ToJSON (PatchSet a) where
- toJSON = genericToJSON $ unPrefix "_"
- toEncoding = genericToEncoding $ unPrefix "_"
-
-instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
- parseJSON = genericParseJSON $ unPrefix "_"
-
-{-
-instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
- arbitrary = PatchSet <$> arbitrary <*> arbitrary
-
-type instance Patched (PatchSet a) = Set a
-
-type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
-type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
-
-instance Ord a => Semigroup (PatchSet a) where
- p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
- , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
- } -- TODO Review
-
-instance Ord a => Monoid (PatchSet a) where
- mempty = PatchSet mempty mempty
-
-instance Ord a => Group (PatchSet a) where
- invert (PatchSet r a) = PatchSet a r
-
-instance Ord a => Composable (PatchSet a) where
- composable _ _ = undefined
-
-instance Ord a => Action (PatchSet a) (Set a) where
- act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
-
-instance Applicable (PatchSet a) (Set a) where
- applicable _ _ = mempty
-
-instance Ord a => Validity (PatchSet a) where
- validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
-
-instance Ord a => Transformable (PatchSet a) where
- transformable = undefined
-
- conflicts _p _q = undefined
-
- transformWith conflict p q = undefined conflict p q
-
-instance ToSchema a => ToSchema (PatchSet a)
--}
-
-type AddRem = Replace (Maybe ())
-
-instance Serialise AddRem
-
-remPatch, addPatch :: AddRem
-remPatch = replace (Just ()) Nothing
-addPatch = replace Nothing (Just ())
-
-isRem :: Replace (Maybe ()) -> Bool
-isRem = (== remPatch)
-
-type PatchMap = PM.PatchMap
-
-
-newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
- deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
- Transformable, Composable)
-
-type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
-type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
-
-instance (Serialise a, Ord a) => Serialise (PatchMap a AddRem)
-instance (Serialise a, Ord a) => Serialise (PatchMSet a)
-
--- TODO this breaks module abstraction
-makePrisms ''PM.PatchMap
-
-makePrisms ''PatchMSet
-
-_PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
-_PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
- where
- f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
- f = Map.partition isRem >>> both %~ Map.keysSet
-
- g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
- g (rems, adds) = Map.fromSet (const remPatch) rems
- <> Map.fromSet (const addPatch) adds
-
-instance Ord a => Action (PatchMSet a) (MSet a) where
- act (PatchMSet p) (MSet m) = MSet $ act p m
-
-instance Ord a => Applicable (PatchMSet a) (MSet a) where
- applicable (PatchMSet p) (MSet m) = applicable p m
-
-instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
- toJSON = toJSON . view _PatchMSetIso
- toEncoding = toEncoding . view _PatchMSetIso
-
-instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
- parseJSON = fmap (_PatchMSetIso #) . parseJSON
-
-instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
- arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
-
-instance ToSchema a => ToSchema (PatchMSet a) where
- -- TODO
- declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
-
-type instance Patched (PatchMSet a) = MSet a
-
-instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
- arbitrary = uncurry replace <$> arbitrary
- -- If they happen to be equal then the patch is Keep.
-
-instance ToSchema a => ToSchema (Replace a) where
- declareNamedSchema (_ :: Proxy (Replace a)) = do
- -- TODO Keep constructor is not supported here.
- aSchema <- declareSchemaRef (Proxy :: Proxy a)
- return $ NamedSchema (Just "Replace") $ mempty
- & type_ ?~ SwaggerObject
- & properties .~
- InsOrdHashMap.fromList
- [ ("old", aSchema)
- , ("new", aSchema)
- ]
- & required .~ [ "old", "new" ]
-
-data NgramsPatch =
- NgramsPatch { _patch_children :: PatchMSet NgramsTerm
- , _patch_list :: Replace ListType -- TODO Map UserId ListType
- }
- deriving (Eq, Show, Generic)
-
-deriveJSON (unPrefix "_") ''NgramsPatch
-makeLenses ''NgramsPatch
-
-instance ToSchema NgramsPatch where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
-
-instance Arbitrary NgramsPatch where
- arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
-
-instance Serialise NgramsPatch
-instance Serialise (Replace ListType)
-instance Serialise ListType
-
-type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
-
-_NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
-_NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
-
-instance Semigroup NgramsPatch where
- p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
-
-instance Monoid NgramsPatch where
- mempty = _NgramsPatch # mempty
-
-instance Validity NgramsPatch where
- validate p = p ^. _NgramsPatch . to validate
-
-instance Transformable NgramsPatch where
- transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
-
- conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
-
- transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
- where
- (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
-
-type ConflictResolutionNgramsPatch =
- ( ConflictResolutionPatchMSet NgramsTerm
- , ConflictResolutionReplace ListType
- )
-type instance ConflictResolution NgramsPatch =
- ConflictResolutionNgramsPatch
-
-type PatchedNgramsPatch = (Set NgramsTerm, ListType)
- -- ~ Patched NgramsPatchIso
-type instance Patched NgramsPatch = PatchedNgramsPatch
-
-instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
- applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
- applicable p (Just nre) =
- applicable (p ^. patch_children) (nre ^. nre_children) <>
- applicable (p ^. patch_list) (nre ^. nre_list)
-
-instance Action NgramsPatch NgramsRepoElement where
- act p = (nre_children %~ act (p ^. patch_children))
- . (nre_list %~ act (p ^. patch_list))
-
-instance Action NgramsPatch (Maybe NgramsRepoElement) where
- act = fmap . act
-
-newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
- deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
-
-instance Serialise NgramsTablePatch
-instance Serialise (PatchMap NgramsTerm NgramsPatch)
-
-instance FromField NgramsTablePatch
- where
- fromField = fromField'
-
-instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
- where
- fromField = fromField'
-
---instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
---
-type instance ConflictResolution NgramsTablePatch =
- NgramsTerm -> ConflictResolutionNgramsPatch
-
-type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
- -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
-type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
-
-makePrisms ''NgramsTablePatch
-instance ToSchema (PatchMap NgramsTerm NgramsPatch)
-instance ToSchema NgramsTablePatch
-
-instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
- applicable p = applicable (p ^. _NgramsTablePatch)
-
-instance Action NgramsTablePatch (Maybe NgramsTableMap) where
- act p =
- fmap (execState (reParentNgramsTablePatch p)) .
- act (p ^. _NgramsTablePatch)
-
-instance Arbitrary NgramsTablePatch where
- arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
-
--- Should it be less than an Lens' to preserve PatchMap's abstraction.
--- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
--- ntp_ngrams_patches = _NgramsTablePatch . undefined
-
-type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
-
-reRootChildren :: NgramsTerm -> ReParent NgramsTerm
-reRootChildren root ngram = do
- nre <- use $ at ngram
- forOf_ (_Just . nre_children . folded) nre $ \child -> do
- at child . _Just . nre_root ?= root
- reRootChildren root child
-
-reParent :: Maybe RootParent -> ReParent NgramsTerm
-reParent rp child = do
- at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
- . (nre_root .~ (_rp_root <$> rp))
- )
- reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
-
-reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
-reParentAddRem rp child p =
- reParent (if isRem p then Nothing else Just rp) child
-
-reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
-reParentNgramsPatch parent ngramsPatch = do
- root_of_parent <- use (at parent . _Just . nre_root)
- let
- root = fromMaybe parent root_of_parent
- rp = RootParent { _rp_root = root, _rp_parent = parent }
- itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
- -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
-
-reParentNgramsTablePatch :: ReParent NgramsTablePatch
-reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
- -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
-
-------------------------------------------------------------------------
-------------------------------------------------------------------------
-type Version = Int
-
-data Versioned a = Versioned
- { _v_version :: Version
- , _v_data :: a
- }
- deriving (Generic, Show, Eq)
-deriveJSON (unPrefix "_v_") ''Versioned
-makeLenses ''Versioned
-instance ToSchema a => ToSchema (Versioned a) where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_v_")
-instance Arbitrary a => Arbitrary (Versioned a) where
- arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
-
-
{-
-- TODO sequences of modifications (Patchs)
type NgramsIdPatch = Patch NgramsId NgramsPatch
]
-}
-ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
-ngramsTypeFromTabType tabType =
- let lieu = "Garg.API.Ngrams: " :: Text in
- case tabType of
- Sources -> TableNgrams.Sources
- Authors -> TableNgrams.Authors
- Institutes -> TableNgrams.Institutes
- Terms -> TableNgrams.NgramsTerms
- _ -> panic $ lieu <> "No Ngrams for this tab"
- -- TODO: This `panic` would disapear with custom NgramsType.
-
------------------------------------------------------------------------
-data Repo s p = Repo
- { _r_version :: Version
- , _r_state :: s
- , _r_history :: [p]
- -- first patch in the list is the most recent
- }
- deriving (Generic)
-
-instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
- parseJSON = genericParseJSON $ unPrefix "_r_"
-
-instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
- toJSON = genericToJSON $ unPrefix "_r_"
- toEncoding = genericToEncoding $ unPrefix "_r_"
-
-instance (Serialise s, Serialise p) => Serialise (Repo s p)
-
-makeLenses ''Repo
-
-initRepo :: Monoid s => Repo s p
-initRepo = Repo 1 mempty []
-
-type NgramsRepo = Repo NgramsState NgramsStatePatch
-type NgramsState = Map TableNgrams.NgramsType (Map NodeId NgramsTableMap)
-type NgramsStatePatch = PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch)
-
-instance Serialise (PM.PatchMap NodeId NgramsTablePatch)
-instance Serialise NgramsStatePatch
-
-initMockRepo :: NgramsRepo
-initMockRepo = Repo 1 s []
- where
- s = Map.singleton TableNgrams.NgramsTerms
- $ Map.singleton 47254
- $ Map.fromList
- [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
-
-data RepoEnv = RepoEnv
- { _renv_var :: !(MVar NgramsRepo)
- , _renv_saver :: !(IO ())
- , _renv_lock :: !FileLock
- }
- deriving (Generic)
-
-makeLenses ''RepoEnv
-
-class HasRepoVar env where
- repoVar :: Getter env (MVar NgramsRepo)
-
-instance HasRepoVar (MVar NgramsRepo) where
- repoVar = identity
-class HasRepoSaver env where
- repoSaver :: Getter env (IO ())
-
-class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
- repoEnv :: Getter env RepoEnv
-
-instance HasRepo RepoEnv where
- repoEnv = identity
-
-instance HasRepoVar RepoEnv where
- repoVar = renv_var
-
-instance HasRepoSaver RepoEnv where
- repoSaver = renv_saver
+saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
+ => m ()
+saveNodeStory = do
+ saver <- view hasNodeStorySaver
+ liftBase $ do
+ --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
+ saver
+ --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
-type RepoCmdM env err m =
- ( MonadReader env m
- , MonadError err m
- , MonadBaseControl IO m
- , HasRepo env
- )
-------------------------------------------------------------------------
-saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
+saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
=> m ()
-saveRepo = liftBase =<< view repoSaver
+saveNodeStoryImmediate = do
+ saver <- view hasNodeStoryImmediateSaver
+ liftBase $ do
+ --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
+ saver
+ --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
listTypeConflictResolution :: ListType -> ListType -> ListType
listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
+
ngramsStatePatchConflictResolution
:: TableNgrams.NgramsType
- -> NodeId
-> NgramsTerm
-> ConflictResolutionNgramsPatch
-ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
- = (const ours, ours)
+ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
+ = (ours, (const ours, ours), (False, False))
+ -- (False, False) mean here that Mod has always priority.
+ -- = (ours, (const ours, ours), (True, False))
+ -- (True, False) <- would mean priority to the left (same as ours).
-- undefined {- TODO think this through -}, listTypeConflictResolution)
+
+
+
-- Current state:
-- Insertions are not considered as patches,
-- they do not extend history,
insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
-- TODO error handling
-something :: Monoid a => Maybe a -> a
-something Nothing = mempty
-something (Just a) = a
-
{- unused
-- TODO refactor with putListNgrams
copyListNgrams :: RepoCmdM env err m
var <- view repoVar
liftBase $ modifyMVar_ var $
pure . (r_state . at ngramsType %~ (Just . f . something))
- saveRepo
+ saveNodeStory
where
f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
var <- view repoVar
liftBase $ modifyMVar_ var $
pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
- saveRepo
+ saveNodeStory
where
m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
-}
-rmListNgrams :: RepoCmdM env err m
- => ListId
- -> TableNgrams.NgramsType
- -> m ()
-rmListNgrams l nt = setListNgrams l nt mempty
-
-- | TODO: incr the Version number
-- && should use patch
-setListNgrams :: RepoCmdM env err m
+-- UNSAFE
+
+setListNgrams :: HasNodeStory env err m
=> NodeId
-> TableNgrams.NgramsType
-> Map NgramsTerm NgramsRepoElement
-> m ()
setListNgrams listId ngramsType ns = do
- var <- view repoVar
+ -- printDebug "[setListNgrams]" (listId, ngramsType)
+ getter <- view hasNodeStory
+ var <- liftBase $ (getter ^. nse_getter) [listId]
liftBase $ modifyMVar_ var $
- pure . ( r_state
- . at ngramsType %~
- (Just .
- (at listId .~ ( Just ns))
- . something
- )
+ pure . ( unNodeStory
+ . at listId . _Just
+ . a_state
+ . at ngramsType
+ .~ Just ns
)
- saveRepo
+ saveNodeStory
--- If the given list of ngrams elements contains ngrams already in
--- the repo, they will be ignored.
-putListNgrams :: RepoCmdM env err m
- => NodeId
- -> TableNgrams.NgramsType
- -> [NgramsElement] -> m ()
-putListNgrams _ _ [] = pure ()
-putListNgrams listId ngramsType nes = putListNgrams' listId ngramsType m
- where
- m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
+newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
+newNgramsFromNgramsStatePatch p =
+ [ text2ngrams (unNgramsTerm n)
+ | (n,np) <- p ^.. _PatchMap
+ -- . each . _PatchMap
+ . each . _NgramsTablePatch
+ . _PatchMap . ifolded . withIndex
+ , _ <- np ^.. patch_new . _Just
+ ]
-putListNgrams' :: RepoCmdM env err m
- => ListId
- -> TableNgrams.NgramsType
- -> Map NgramsTerm NgramsRepoElement
- -> m ()
-putListNgrams' listId ngramsType ns = do
- -- printDebug "putListNgrams" (length nes)
- var <- view repoVar
- liftBase $ modifyMVar_ var $
- pure . ( r_state
- . at ngramsType %~
- (Just .
- (at listId %~
- ( Just
- . (<> ns)
- . something
- )
- )
- . something
- )
- )
- saveRepo
--- TODO-ACCESS check
-tableNgramsPost :: RepoCmdM env err m
- => TabType
- -> NodeId
- -> Maybe ListType
- -> [NgramsTerm] -> m ()
-tableNgramsPost tabType listId mayList =
- putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
-
-currentVersion :: RepoCmdM env err m
- => m Version
-currentVersion = do
- var <- view repoVar
- r <- liftBase $ readMVar var
- pure $ r ^. r_version
-tableNgramsPull :: RepoCmdM env err m
+commitStatePatch :: ( HasNodeStory env err m
+ , HasNodeStoryImmediateSaver env
+ , HasNodeArchiveStoryImmediateSaver env
+ , HasMail env)
+ => ListId
+ -> Versioned NgramsStatePatch'
+ -> m (Versioned NgramsStatePatch')
+commitStatePatch listId (Versioned _p_version p) = do
+ -- printDebug "[commitStatePatch]" listId
+ var <- getNodeStoryVar [listId]
+ archiveSaver <- view hasNodeArchiveStoryImmediateSaver
+ vq' <- liftBase $ modifyMVar var $ \ns -> do
+ let
+ a = ns ^. unNodeStory . at listId . _Just
+ -- apply patches from version p_version to a ^. a_version
+ -- TODO Check this
+ --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
+ q = mconcat $ a ^. a_history
+
+ --printDebug "[commitStatePatch] transformWith" (p,q)
+ -- let tws s = case s of
+ -- (Mod p) -> "Mod"
+ -- _ -> "Rpl"
+ -- printDebug "[commitStatePatch] transformWith" (tws $ p ^. _NgramsPatch, tws $ q ^. _NgramsPatch)
+
+ let
+ (p', q') = transformWith ngramsStatePatchConflictResolution p q
+ a' = a & a_version +~ 1
+ & a_state %~ act p'
+ & a_history %~ (p' :)
+
+ {-
+ -- Ideally we would like to check these properties. However:
+ -- * They should be checked only to debug the code. The client data
+ -- should be able to trigger these.
+ -- * What kind of error should they throw (we are in IO here)?
+ -- * Should we keep modifyMVar?
+ -- * Should we throw the validation in an Exception, catch it around
+ -- modifyMVar and throw it back as an Error?
+ assertValid $ transformable p q
+ assertValid $ applicable p' (r ^. r_state)
+ -}
+ -- printDebug "[commitStatePatch] a version" (a ^. a_version)
+ -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
+ let newNs = ( ns & unNodeStory . at listId .~ (Just a')
+ , Versioned (a' ^. a_version) q'
+ )
+
+ -- NOTE Now is the only good time to save the archive history. We
+ -- have the handle to the MVar and we need to save its exact
+ -- snapshot. Node Story archive is a linear table, so it's only
+ -- couple of inserts, it shouldn't take long...
+
+ -- If we postponed saving the archive to the debounce action, we
+ -- would have issues like
+ -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
+ -- where the `q` computation from above (which uses the archive)
+ -- would cause incorrect patch application (before the previous
+ -- archive was saved and applied)
+ newNs' <- archiveSaver $ fst newNs
+
+ pure (newNs', snd newNs)
+
+ -- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce)
+ saveNodeStory
+ --saveNodeStoryImmediate
+ -- Save new ngrams
+ _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
+
+ pure vq'
+
+
+
+-- This is a special case of tableNgramsPut where the input patch is empty.
+tableNgramsPull :: HasNodeStory env err m
=> ListId
-> TableNgrams.NgramsType
-> Version
-> m (Versioned NgramsTablePatch)
tableNgramsPull listId ngramsType p_version = do
- var <- view repoVar
+ printDebug "[tableNgramsPull]" (listId, ngramsType)
+ var <- getNodeStoryVar [listId]
r <- liftBase $ readMVar var
let
- q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
- q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
+ a = r ^. unNodeStory . at listId . _Just
+ q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
+ q_table = q ^. _PatchMap . at ngramsType . _Just
- pure (Versioned (r ^. r_version) q_table)
+ pure (Versioned (a ^. a_version) q_table)
+
+
+
+-- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
-- Apply the given patch to the DB and returns the patch to be applied on the
-- client.
-- TODO-ACCESS check
-tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
- => TabType -> ListId
+tableNgramsPut :: ( HasNodeStory env err m
+ , HasNodeStoryImmediateSaver env
+ , HasNodeArchiveStoryImmediateSaver env
+ , HasInvalidError err
+ , HasSettings env
+ , HasMail env
+ )
+ => TabType
+ -> ListId
-> Versioned NgramsTablePatch
-> m (Versioned NgramsTablePatch)
tableNgramsPut tabType listId (Versioned p_version p_table)
| p_table == mempty = do
+ printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
let ngramsType = ngramsTypeFromTabType tabType
tableNgramsPull listId ngramsType p_version
| otherwise = do
+ printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
let ngramsType = ngramsTypeFromTabType tabType
- (p0, p0_validity) = PM.singleton listId p_table
- (p, p_validity) = PM.singleton ngramsType p0
+ (p, p_validity) = PM.singleton ngramsType p_table
- assertValid p0_validity
assertValid p_validity
- var <- view repoVar
- vq' <- liftBase $ modifyMVar var $ \r -> do
- let
- q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
- (p', q') = transformWith ngramsStatePatchConflictResolution p q
- r' = r & r_version +~ 1
- & r_state %~ act p'
- & r_history %~ (p' :)
- q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
- {-
- -- Ideally we would like to check these properties. However:
- -- * They should be checked only to debug the code. The client data
- -- should be able to trigger these.
- -- * What kind of error should they throw (we are in IO here)?
- -- * Should we keep modifyMVar?
- -- * Should we throw the validation in an Exception, catch it around
- -- modifyMVar and throw it back as an Error?
- assertValid $ transformable p q
- assertValid $ applicable p' (r ^. r_state)
- -}
- pure (r', Versioned (r' ^. r_version) q'_table)
-
- saveRepo
- pure vq'
-
-mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
-mergeNgramsElement _neOld neNew = neNew
+ ret <- commitStatePatch listId (Versioned p_version p)
+ <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
+
+ pure ret
+
+
+
+tableNgramsPostChartsAsync :: ( HasNodeStory env err m
+ , FlowCmdM env err m
+ , HasNodeError err
+ , HasSettings env
+ )
+ => UpdateTableNgramsCharts
+ -> (JobLog -> m ())
+ -> m JobLog
+tableNgramsPostChartsAsync utn logStatus = do
+ let tabType = utn ^. utn_tab_type
+ let listId = utn ^. utn_list_id
+
+ node <- getNode listId
+ let nId = node ^. node_id
+ _uId = node ^. node_user_id
+ mCId = node ^. node_parent_id
+
+ -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
+ -- printDebug "[tableNgramsPostChartsAsync] listId" listId
+
+ case mCId of
+ Nothing -> do
+ printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
+ pure $ jobLogFail $ jobLogInit 1
+ Just cId -> do
+ case tabType of
+ Authors -> do
+ -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
+ (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
+ logRef
+ _ <- Metrics.updatePie cId (Just listId) tabType Nothing
+ logRefSuccess
+
+ getRef
+ Institutes -> do
+ -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
+ -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
+ (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
+ logRef
+ _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
+ -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
+ logRefSuccess
+ _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
+ -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
+ logRefSuccess
+ _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
+ logRefSuccess
+
+ getRef
+ Sources -> do
+ -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
+ (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
+ logRef
+ _ <- Metrics.updatePie cId (Just listId) tabType Nothing
+ logRefSuccess
+
+ getRef
+ Terms -> do
+ -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
+ (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
+ logRef
+{-
+ _ <- Metrics.updateChart cId (Just listId) tabType Nothing
+ logRefSuccess
+ _ <- Metrics.updatePie cId (Just listId) tabType Nothing
+ logRefSuccess
+ _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
+ logRefSuccess
+ _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
+ logRefSuccess
+ _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
+ logRefSuccess
+ _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
+-}
+ logRefSuccess
+
+ getRef
+ _ -> do
+ printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
+ pure $ jobLogFail $ jobLogInit 1
+
{-
{ _ne_list :: ListType
If we merge the parents/children we can potentially create cycles!
}
-}
-getNgramsTableMap :: RepoCmdM env err m
- => ListId
+getNgramsTableMap :: HasNodeStory env err m
+ => NodeId
-> TableNgrams.NgramsType
-> m (Versioned NgramsTableMap)
getNgramsTableMap nodeId ngramsType = do
- v <- view repoVar
+ v <- getNodeStoryVar [nodeId]
repo <- liftBase $ readMVar v
- pure $ Versioned (repo ^. r_version)
- (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
+ pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
+ (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
+
+
+dumpJsonTableMap :: HasNodeStory env err m
+ => Text
+ -> NodeId
+ -> TableNgrams.NgramsType
+ -> m ()
+dumpJsonTableMap fpath nodeId ngramsType = do
+ m <- getNgramsTableMap nodeId ngramsType
+ liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
+ pure ()
+
type MinSize = Int
type MaxSize = Int
-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
-- TODO: should take only one ListId
-getTime' :: MonadBase IO m => m TimeSpec
-getTime' = liftBase $ getTime ProcessCPUTime
-
getTableNgrams :: forall env err m.
- (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
+ (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
=> NodeType -> NodeId -> TabType
- -> ListId -> Limit -> Maybe Offset
+ -> ListId -> Limit -> Maybe Offset
-> Maybe ListType
-> Maybe MinSize -> Maybe MaxSize
-> Maybe OrderBy
-> (NgramsTerm -> Bool)
- -> m (Versioned NgramsTable)
+ -> m (VersionedWithCount NgramsTable)
getTableNgrams _nType nId tabType listId limit_ offset
listType minSize maxSize orderBy searchQuery = do
- t0 <- getTime'
+ t0 <- getTime
-- lIds <- selectNodesWithUsername NodeList userMaster
let
ngramsType = ngramsTypeFromTabType tabType
minSize' = maybe (const True) (<=) minSize
maxSize' = maybe (const True) (>=) maxSize
+ rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
+ (tableMap ^. at r)
+ )
+ (ne ^. ne_root)
+
selected_node n = minSize' s
&& maxSize' s
&& searchQuery (n ^. ne_ngrams)
selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
---------------------------------------
- sortOnOrder Nothing = identity
+ sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
- sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
- sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
+ sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to List.nub . to length)
+ sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to List.nub . to length)
---------------------------------------
- selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
- selectAndPaginate tableMap = roots <> inners
+ -- | Filter the given `tableMap` with the search criteria.
+ filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
+ filteredNodes tableMap = roots
where
list = tableMap ^.. each
- rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
- (ne ^. ne_root)
- selected_nodes = list & take limit_
- . drop offset'
- . filter selected_node
- . sortOnOrder orderBy
- roots = rootOf <$> selected_nodes
- rootsSet = Set.fromList (_ne_ngrams <$> roots)
- inners = list & filter (selected_inner rootsSet)
+ selected_nodes = list & filter selected_node
+ roots = rootOf tableMap <$> selected_nodes
- ---------------------------------------
- setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
- setScores False table = pure table
- setScores True table = do
- let ngrams_terms = (table ^.. each . ne_ngrams)
- t1 <- getTime'
- occurrences <- getOccByNgramsOnlyFast' nId
- listId
- ngramsType
- ngrams_terms
- t2 <- getTime'
- liftBase $ hprint stderr
- ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
- (length ngrams_terms) t1 t2
- {-
- occurrences <- getOccByNgramsOnlySlow nType nId
- (lIds <> [listId])
- ngramsType
- ngrams_terms
- -}
- let
- setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
+ -- | Appends subitems (selected from `tableMap`) for given `roots`.
+ withInners :: Map NgramsTerm NgramsElement -> [NgramsElement] -> [NgramsElement]
+ withInners tableMap roots = roots <> inners
+ where
+ list = tableMap ^.. each
+ rootSet = Set.fromList (_ne_ngrams <$> roots)
+ inners = list & filter (selected_inner rootSet)
- pure $ table & each %~ setOcc
- ---------------------------------------
+ -- | Paginate the results
+ sortAndPaginate :: [NgramsElement] -> [NgramsElement]
+ sortAndPaginate = take limit_
+ . drop offset'
+ . sortOnOrder orderBy
- -- lists <- catMaybes <$> listsWith userMaster
- -- trace (show lists) $
- -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
+ ---------------------------------------
let scoresNeeded = needsScores orderBy
- tableMap1 <- getNgramsTableMap listId ngramsType
- t1 <- getTime'
- tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
- . Map.mapWithKey ngramsElementFromRepo
- t2 <- getTime'
- tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
- . setScores (not scoresNeeded)
- . selectAndPaginate
- t3 <- getTime'
- liftBase $ hprint stderr
- ("getTableNgrams total=" % timeSpecs
- % " map1=" % timeSpecs
- % " map2=" % timeSpecs
- % " map3=" % timeSpecs
- % " sql=" % (if scoresNeeded then "map2" else "map3")
- % "\n"
- ) t0 t3 t0 t1 t1 t2 t2 t3
- pure tableMap3
-
-
-scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env) => NodeId -> TabType -> ListId -> m Int
+ t1 <- getTime
+
+ tableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
+
+ let fltr = tableMap & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
+
+ let fltrCount = length $ fltr ^. v_data . _NgramsTable
+
+ t2 <- getTime
+ let tableMapSorted = over (v_data . _NgramsTable) ((withInners (tableMap ^. v_data)) . sortAndPaginate) fltr
+ t3 <- getTime
+ --printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
+ liftBase $ do
+ hprint stderr
+ ("getTableNgrams total=" % hasTime
+ % " map1=" % hasTime
+ % " map2=" % hasTime
+ % " map3=" % hasTime
+ % " sql=" % (if scoresNeeded then "map2" else "map3")
+ % "\n"
+ ) t0 t3 t0 t1 t1 t2 t2 t3
+
+ -- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
+ pure $ toVersionedWithCount fltrCount tableMapSorted
+
+
+-- | Helper function to get the ngrams table with scores.
+getNgramsTable' :: forall env err m.
+ ( HasNodeStory env err m
+ , HasNodeError err
+ , HasConnectionPool env
+ , HasConfig env
+ , HasMail env)
+ => NodeId
+ -> ListId
+ -> TableNgrams.NgramsType
+ -> m (Versioned (Map.Map NgramsTerm NgramsElement))
+getNgramsTable' nId listId ngramsType = do
+ tableMap <- getNgramsTableMap listId ngramsType
+ tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
+ . Map.mapWithKey ngramsElementFromRepo
+
+-- | Helper function to set scores on an `NgramsTable`.
+setNgramsTableScores :: forall env err m t.
+ ( Each t t NgramsElement NgramsElement
+ , HasNodeStory env err m
+ , HasNodeError err
+ , HasConnectionPool env
+ , HasConfig env
+ , HasMail env)
+ => NodeId
+ -> ListId
+ -> TableNgrams.NgramsType
+ -> t
+ -> m t
+setNgramsTableScores nId listId ngramsType table = do
+ t1 <- getTime
+ occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
+ --printDebug "[setNgramsTableScores] occurrences" occurrences
+ t2 <- getTime
+ liftBase $ do
+ let ngrams_terms = table ^.. each . ne_ngrams
+ -- printDebug "ngrams_terms" ngrams_terms
+ hprint stderr
+ ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
+ (length ngrams_terms) t1 t2
+ let
+ setOcc ne = ne & ne_occurrences .~ msumOf (at (ne ^. ne_ngrams) . _Just) occurrences
+
+ --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
+
+ pure $ table & each %~ setOcc
+
+
+
+
+scoresRecomputeTableNgrams :: forall env err m.
+ (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
+ => NodeId -> TabType -> ListId -> m Int
scoresRecomputeTableNgrams nId tabType listId = do
tableMap <- getNgramsTableMap listId ngramsType
- _ <- tableMap & v_data %%~ setScores
+ _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
. Map.mapWithKey ngramsElementFromRepo
pure $ 1
where
ngramsType = ngramsTypeFromTabType tabType
- setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
- setScores table = do
- let ngrams_terms = (table ^.. each . ne_ngrams)
- occurrences <- getOccByNgramsOnlyFast' nId
- listId
- ngramsType
- ngrams_terms
- let
- setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
-
- pure $ table & each %~ setOcc
-
-
-- APIs
-- TODO: find a better place for the code above, All APIs stay here
-type QueryParamR = QueryParam' '[Required, Strict]
data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
deriving (Generic, Enum, Bounded, Read, Show)
parseUrlPiece "ScoreDesc" = pure ScoreDesc
parseUrlPiece _ = Left "Unexpected value of OrderBy"
+instance ToHttpApiData OrderBy where
+ toUrlPiece = pack . show
instance ToParamSchema OrderBy
instance FromJSON OrderBy
:> QueryParam "maxTermSize" MaxSize
:> QueryParam "orderBy" OrderBy
:> QueryParam "search" Text
- :> Get '[JSON] (Versioned NgramsTable)
+ :> Get '[JSON] (VersionedWithCount NgramsTable)
type TableNgramsApiPut = Summary " Table Ngrams API Change"
:> QueryParamR "ngramsType" TabType
:> ReqBody '[JSON] (Versioned NgramsTablePatch)
:> Put '[JSON] (Versioned NgramsTablePatch)
-type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
- :> QueryParamR "ngramsType" TabType
- :> QueryParamR "list" ListId
- :> QueryParam "listType" ListType
- :> ReqBody '[JSON] [NgramsTerm]
- :> Post '[JSON] ()
-
type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
:> QueryParamR "ngramsType" TabType
:> QueryParamR "list" ListId
:> "recompute" :> Post '[JSON] Int
+type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
+ :> QueryParamR "ngramsType" TabType
+ :> QueryParamR "list" ListId
+ :> Get '[JSON] Version
+
type TableNgramsApi = TableNgramsApiGet
:<|> TableNgramsApiPut
- :<|> TableNgramsApiPost
:<|> RecomputeScoresNgramsApiGet
-
-getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
- => NodeId -> TabType
- -> ListId -> Limit -> Maybe Offset
+ :<|> "version" :> TableNgramsApiGetVersion
+ :<|> TableNgramsAsyncApi
+
+type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
+ :> "async"
+ :> "charts"
+ :> "update"
+ :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
+
+getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
+ => NodeId
+ -> TabType
+ -> ListId
+ -> Limit
+ -> Maybe Offset
-> Maybe ListType
-> Maybe MinSize -> Maybe MaxSize
-> Maybe OrderBy
-> Maybe Text -- full text search
- -> m (Versioned NgramsTable)
+ -> m (VersionedWithCount NgramsTable)
getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
where
- searchQuery = maybe (const True) isInfixOf mt
+ searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt)
+
+
+
+getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
+ => NodeId
+ -> TabType
+ -> ListId
+ -> m Version
+getTableNgramsVersion _nId _tabType listId = currentVersion listId
+
+
+
+ -- TODO: limit?
+ -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
+ -- This line above looks like a waste of computation to finally get only the version.
+ -- See the comment about listNgramsChangedSince.
+
-- | Text search is deactivated for now for ngrams by doc only
-getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
+getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
=> DocId -> TabType
-> ListId -> Limit -> Maybe Offset
-> Maybe ListType
-> Maybe MinSize -> Maybe MaxSize
-> Maybe OrderBy
-> Maybe Text -- full text search
- -> m (Versioned NgramsTable)
+ -> m (VersionedWithCount NgramsTable)
getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
ns <- selectNodesWithUsername NodeList userMaster
let ngramsType = ngramsTypeFromTabType tabType
ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
- let searchQuery = flip S.member (S.fromList ngs)
+ let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
-apiNgramsTableCorpus :: ( RepoCmdM env err m
- , HasNodeError err
- , HasInvalidError err
- , HasConnectionPool env
- )
- => NodeId -> ServerT TableNgramsApi m
-apiNgramsTableCorpus cId = getTableNgramsCorpus cId
+apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
+apiNgramsTableCorpus cId = getTableNgramsCorpus cId
:<|> tableNgramsPut
- :<|> tableNgramsPost
:<|> scoresRecomputeTableNgrams cId
+ :<|> getTableNgramsVersion cId
+ :<|> apiNgramsAsync cId
-apiNgramsTableDoc :: ( RepoCmdM env err m
- , HasNodeError err
- , HasInvalidError err
- , HasConnectionPool env
- )
- => DocId -> ServerT TableNgramsApi m
-apiNgramsTableDoc dId = getTableNgramsDoc dId
+apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
+apiNgramsTableDoc dId = getTableNgramsDoc dId
:<|> tableNgramsPut
- :<|> tableNgramsPost
:<|> scoresRecomputeTableNgrams dId
- -- > add new ngrams in database (TODO AD)
- -- > index all the corpus accordingly (TODO AD)
+ :<|> getTableNgramsVersion dId
+ :<|> apiNgramsAsync dId
-listNgramsChangedSince :: RepoCmdM env err m
+apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
+apiNgramsAsync _dId =
+ serveJobsAPI TableNgramsJob $ \i log ->
+ let
+ log' x = do
+ printDebug "tableNgramsPostChartsAsync" x
+ liftBase $ log x
+ in tableNgramsPostChartsAsync i log'
+
+-- Did the given list of ngrams changed since the given version?
+-- The returned value is versioned boolean value, meaning that one always retrieve the
+-- latest version.
+-- If the given version is negative then one simply receive the latest version and True.
+-- Using this function is more precise than simply comparing the latest version number
+-- with the local version number. Indeed there might be no change to this particular list
+-- and still the version number has changed because of other lists.
+--
+-- Here the added value is to make a compromise between precision, computation, and bandwidth:
+-- * currentVersion: good computation, good bandwidth, bad precision.
+-- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
+-- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
+listNgramsChangedSince :: HasNodeStory env err m
=> ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
listNgramsChangedSince listId ngramsType version
| version < 0 =
- Versioned <$> currentVersion <*> pure True
+ Versioned <$> currentVersion listId <*> pure True
| otherwise =
tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
-
--- Instances
-instance Arbitrary NgramsRepoElement where
- arbitrary = elements $ map ngramsElementToRepo ns
- where
- NgramsTable ns = mockTable
-
---{-
-instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
- where
- parseUrlPiece x = maybeToEither x (decode $ cs x)