Ngrams API
-- | TODO
--- get data of NgramsTable
--- post :: update NodeNodeNgrams
--- group ngrams
-
get ngrams filtered by NgramsType
add get
-}
-{-# LANGUAGE DataKinds #-}
-{-# LANGUAGE DeriveGeneric #-}
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE OverloadedStrings #-}
+{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
+
+{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# OPTIONS -fno-warn-orphans #-}
+{-# LANGUAGE TypeFamilies #-}
module Gargantext.API.Ngrams
+ ( TableNgramsApi
+ , TableNgramsApiGet
+ , TableNgramsApiPut
+
+ , getTableNgrams
+ , setListNgrams
+ --, rmListNgrams TODO fix before exporting
+ , apiNgramsTableCorpus
+ , apiNgramsTableDoc
+
+ , NgramsStatePatch
+ , NgramsTablePatch
+ , NgramsTableMap
+
+ , NgramsTerm(..)
+
+ , NgramsElement(..)
+ , mkNgramsElement
+
+ , RootParent(..)
+
+ , MSet
+ , mSetFromList
+ , mSetToList
+
+ , Repo(..)
+ , r_version
+ , r_state
+ , r_history
+ , NgramsRepo
+ , NgramsRepoElement(..)
+ , saveRepo
+ , initRepo
+
+ , RepoEnv(..)
+ , renv_var
+ , renv_lock
+
+ , TabType(..)
+
+ , HasRepoVar(..)
+ , HasRepoSaver(..)
+ , HasRepo(..)
+ , RepoCmdM
+ , QueryParamR
+ , TODO
+
+ -- Internals
+ , getNgramsTableMap
+ , dumpJsonTableMap
+ , tableNgramsPull
+ , tableNgramsPut
+
+ , Version
+ , Versioned(..)
+ , VersionedWithCount(..)
+ , currentVersion
+ , listNgramsChangedSince
+ )
where
-import Prelude (round)
--- import Gargantext.Database.User (UserId)
-import Data.Patch.Class (Replace, replace)
---import qualified Data.Map.Strict.Patch as PM
+import Control.Concurrent
+import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
+import Control.Monad.Reader
+import Data.Aeson hiding ((.=))
+import Data.Either (Either(..))
+import Data.Foldable
+import Data.Map.Strict (Map)
+import Data.Maybe (fromMaybe)
import Data.Monoid
---import Data.Semigroup
-import Data.Set (Set)
-import qualified Data.Set as Set
---import Data.Maybe (catMaybes)
---import qualified Data.Map.Strict as DM
---import qualified Data.Set as Set
-import Control.Lens (view, (.~))
-import Data.Aeson
-import Data.Aeson.TH (deriveJSON)
-import Data.Either(Either(Left))
-import Data.Map (lookup)
-import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
-import Data.Swagger
-import Data.Text (Text)
-import Database.PostgreSQL.Simple (Connection)
+import Data.Ord (Down(..))
+import Data.Patch.Class (Action(act), Transformable(..), ours)
+import Data.Swagger hiding (version, patch)
+import Data.Text (Text, isInfixOf, unpack)
+import Data.Text.Lazy.IO as DTL
+import Formatting (hprint, int, (%))
import GHC.Generics (Generic)
-import Gargantext.Core.Types (node_id)
---import Gargantext.Core.Types.Main (Tree(..))
-import Gargantext.Core.Utils.Prefix (unPrefix)
-import Gargantext.Database.Types.Node (NodeType(..))
-import Gargantext.Database.Node (getListsWithParentId)
-import qualified Gargantext.Database.Ngrams as Ngrams
-import Gargantext.Prelude
-import Gargantext.Core.Types (ListType(..), ListId)
-import Prelude (Enum, Bounded, minBound, maxBound)
+import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
+import Gargantext.API.Admin.Types (HasSettings)
+import Gargantext.API.Ngrams.Types
+import Gargantext.API.Prelude
+import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
+import Gargantext.Core.Utils (something)
+import Gargantext.Database.Action.Flow.Types
+import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
+import Gargantext.Database.Admin.Config (userMaster)
+import Gargantext.Database.Admin.Types.Node (NodeType(..))
+import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
+import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
+import Gargantext.Database.Query.Table.Node (getNode)
+import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
+import Gargantext.Database.Query.Table.Node.Select
+import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
+import Gargantext.Prelude hiding (log)
+import Gargantext.Prelude.Job
+import Gargantext.Prelude.Clock (hasTime, getTime)
+import Prelude (error)
import Servant hiding (Patch)
+import Servant.Job.Async (JobFunction(..), serveJobsAPI)
+import System.IO (stderr)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
-
-------------------------------------------------------------------------
---data FacetFormat = Table | Chart
-data TabType = Docs | Terms | Sources | Authors | Institutes | Trash
- | Contacts
- deriving (Generic, Enum, Bounded)
-
-instance FromHttpApiData TabType
- where
- parseUrlPiece "Docs" = pure Docs
- parseUrlPiece "Terms" = pure Terms
- parseUrlPiece "Sources" = pure Sources
- parseUrlPiece "Institutes" = pure Institutes
- parseUrlPiece "Authors" = pure Authors
- parseUrlPiece "Trash" = pure Trash
-
- parseUrlPiece "Contacts" = pure Contacts
-
- parseUrlPiece _ = Left "Unexpected value of TabType"
-
-instance ToParamSchema TabType
-instance ToJSON TabType
-instance FromJSON TabType
-instance ToSchema TabType
-instance Arbitrary TabType
- where
- arbitrary = elements [minBound .. maxBound]
-
-------------------------------------------------------------------------
-type NgramsTerm = Text
-
-data NgramsElement =
- NgramsElement { _ne_ngrams :: NgramsTerm
- , _ne_list :: ListType
- , _ne_occurrences :: Int
- , _ne_parent :: Maybe NgramsTerm
- , _ne_children :: Set NgramsTerm
- }
- deriving (Ord, Eq, Show, Generic)
-$(deriveJSON (unPrefix "_ne_") ''NgramsElement)
-
-instance ToSchema NgramsElement
-instance Arbitrary NgramsElement where
- arbitrary = elements [NgramsElement "sport" GraphList 1 Nothing mempty]
-
-------------------------------------------------------------------------
-newtype NgramsTable = NgramsTable { _ngramsTable :: [NgramsElement] }
- deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
-
-instance Arbitrary NgramsTable where
- arbitrary = elements
- [ NgramsTable
- [ NgramsElement "animal" GraphList 1 Nothing (Set.fromList ["dog", "cat"])
- , NgramsElement "cat" GraphList 1 (Just "animal") mempty
- , NgramsElement "cats" StopList 4 Nothing mempty
- , NgramsElement "dog" GraphList 3 (Just "animal")(Set.fromList ["dogs"])
- , NgramsElement "dogs" StopList 4 (Just "dog") mempty
- , NgramsElement "fox" GraphList 1 Nothing mempty
- , NgramsElement "object" CandidateList 2 Nothing mempty
- , NgramsElement "nothing" StopList 4 Nothing mempty
- , NgramsElement "organic" GraphList 3 Nothing (Set.singleton "flower")
- , NgramsElement "flower" GraphList 3 (Just "organic") mempty
- , NgramsElement "moon" CandidateList 1 Nothing mempty
- , NgramsElement "sky" StopList 1 Nothing mempty
- ]
- ]
-instance ToSchema NgramsTable
-
-------------------------------------------------------------------------
--- On the Client side:
---data Action = InGroup NgramsId NgramsId
--- | OutGroup NgramsId NgramsId
--- | SetListType NgramsId ListType
-
-data PatchSet a = PatchSet
- { _rem :: Set a
- , _add :: Set a
- }
- deriving (Eq, Ord, Show, Generic)
-
-instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
- arbitrary = PatchSet <$> arbitrary <*> arbitrary
-
-instance ToJSON a => ToJSON (PatchSet a) where
- toJSON = genericToJSON $ unPrefix "_"
- toEncoding = genericToEncoding $ unPrefix "_"
-
-instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
- parseJSON = genericParseJSON $ unPrefix "_"
-
-instance ToSchema a => ToSchema (PatchSet a)
-
-instance ToSchema a => ToSchema (Replace a) where
- declareNamedSchema (_ :: proxy (Replace a)) = do
- aSchema <- declareSchemaRef (Proxy :: Proxy a)
- return $ NamedSchema (Just "Replace") $ mempty
- & type_ .~ SwaggerObject
- & properties .~
- InsOrdHashMap.fromList
- [ ("old", aSchema)
- , ("new", aSchema)
- ]
- & required .~ [ "old", "new" ]
-
-data NgramsPatch =
- NgramsPatch { _patch_children :: PatchSet NgramsElement
- , _patch_list :: Replace ListType -- TODO Map UserId ListType
- }
- deriving (Ord, Eq, Show, Generic)
-$(deriveJSON (unPrefix "_") ''NgramsPatch)
-
--- instance Semigroup NgramsPatch where
-
-instance ToSchema NgramsPatch
-
-instance Arbitrary NgramsPatch where
- arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
-
-data NgramsIdPatch =
- NgramsIdPatch { _nip_ngrams :: NgramsTerm
- , _nip_ngramsPatch :: NgramsPatch
- }
- deriving (Ord, Eq, Show, Generic)
-$(deriveJSON (unPrefix "_nip_") ''NgramsIdPatch)
-
-instance ToSchema NgramsIdPatch
-
-instance Arbitrary NgramsIdPatch where
- arbitrary = NgramsIdPatch <$> arbitrary <*> arbitrary
-
- --
--- TODO:
--- * This should be a Map NgramsId NgramsPatch
--- * Patchs -> Patches
-newtype NgramsIdPatchs =
- NgramsIdPatchs { _nip_ngramsIdPatchs :: [NgramsIdPatch] }
- deriving (Ord, Eq, Show, Generic, Arbitrary)
-$(deriveJSON (unPrefix "_nip_") ''NgramsIdPatchs)
-instance ToSchema NgramsIdPatchs
-
-------------------------------------------------------------------------
-------------------------------------------------------------------------
-type Version = Int
-
-data Versioned a = Versioned
- { _v_version :: Version
- , _v_data :: a
- }
+import qualified Data.Aeson.Text as DAT
+import qualified Data.List as List
+import qualified Data.Map.Strict as Map
+import qualified Data.Map.Strict.Patch as PM
+import qualified Data.Set as S
+import qualified Data.Set as Set
+import qualified Gargantext.API.Metrics as Metrics
+import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
{-
--- TODO sequencs of modifications (Patchs)
+-- TODO sequences of modifications (Patchs)
type NgramsIdPatch = Patch NgramsId NgramsPatch
ngramsPatch :: Int -> NgramsPatch
-ngramsPatch n = NgramsPatch (DM.fromList [(1, StopList)]) (Set.fromList [n]) Set.empty
+ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
toEdit n p = Edit n p
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
-type CorpusId = Int
-type TableNgramsApiGet = Summary " Table Ngrams API Get"
- :> QueryParam "ngramsType" TabType
- :> QueryParam "list" ListId
- :> Get '[JSON] NgramsTable
+{-
+-- TODO: Replace.old is ignored which means that if the current list
+-- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
+-- the list is going to be `StopTerm` while it should keep `MapTerm`.
+-- However this should not happen in non conflicting situations.
+mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
+mkListsUpdate nt patches =
+ [ (ngramsTypeId nt, ng, listTypeId lt)
+ | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
+ , lt <- patch ^.. patch_list . new
+ ]
+
+mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
+ -> NgramsType
+ -> NgramsTablePatch
+ -> [(NgramsTypeId, NgramsParent, NgramsChild)]
+mkChildrenGroups addOrRem nt patches =
+ [ (ngramsTypeId nt, parent, child)
+ | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
+ , child <- patch ^.. patch_children . to addOrRem . folded
+ ]
+-}
-type TableNgramsApi = Summary " Table Ngrams API Change"
- :> QueryParam "list" ListId
- :> ReqBody '[JSON] NgramsIdPatchsFeed -- Versioned ...
- :> Put '[JSON] NgramsIdPatchsBack -- Versioned ...
+------------------------------------------------------------------------
-type NgramsIdPatchsFeed = NgramsIdPatchs
-type NgramsIdPatchsBack = NgramsIdPatchs
+saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
+ => m ()
+saveRepo = liftBase =<< view repoSaver
+
+listTypeConflictResolution :: ListType -> ListType -> ListType
+listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
+
+ngramsStatePatchConflictResolution
+ :: TableNgrams.NgramsType
+ -> NodeId
+ -> NgramsTerm
+ -> ConflictResolutionNgramsPatch
+ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
+ = (ours, (const ours, ours), (False, False))
+ -- (False, False) mean here that Mod has always priority.
+ -- (True, False) <- would mean priority to the left (same as ours).
+
+ -- undefined {- TODO think this through -}, listTypeConflictResolution)
+
+-- Current state:
+-- Insertions are not considered as patches,
+-- they do not extend history,
+-- they do not bump version.
+insertNewOnly :: a -> Maybe b -> a
+insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
+ -- TODO error handling
+
+{- unused
+-- TODO refactor with putListNgrams
+copyListNgrams :: RepoCmdM env err m
+ => NodeId -> NodeId -> NgramsType
+ -> m ()
+copyListNgrams srcListId dstListId ngramsType = do
+ var <- view repoVar
+ liftBase $ modifyMVar_ var $
+ pure . (r_state . at ngramsType %~ (Just . f . something))
+ saveRepo
+ where
+ f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
+ f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
+
+-- TODO refactor with putListNgrams
+-- The list must be non-empty!
+-- The added ngrams must be non-existent!
+addListNgrams :: RepoCmdM env err m
+ => NodeId -> NgramsType
+ -> [NgramsElement] -> m ()
+addListNgrams listId ngramsType nes = do
+ var <- view repoVar
+ liftBase $ modifyMVar_ var $
+ pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
+ saveRepo
+ where
+ m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
+-}
+-- UNSAFE
+rmListNgrams :: RepoCmdM env err m
+ => ListId
+ -> TableNgrams.NgramsType
+ -> m ()
+rmListNgrams l nt = setListNgrams l nt mempty
+
+-- | TODO: incr the Version number
+-- && should use patch
+-- UNSAFE
+setListNgrams :: RepoCmdM env err m
+ => NodeId
+ -> TableNgrams.NgramsType
+ -> Map NgramsTerm NgramsRepoElement
+ -> m ()
+setListNgrams listId ngramsType ns = do
+ var <- view repoVar
+ liftBase $ modifyMVar_ var $
+ pure . ( r_state
+ . at ngramsType %~
+ (Just .
+ (at listId .~ ( Just ns))
+ . something
+ )
+ )
+ saveRepo
+
+
+currentVersion :: RepoCmdM env err m
+ => m Version
+currentVersion = do
+ var <- view repoVar
+ r <- liftBase $ readMVar var
+ pure $ r ^. r_version
+
+newNgramsFromNgramsStatePatch :: NgramsStatePatch -> [Ngrams]
+newNgramsFromNgramsStatePatch p =
+ [ text2ngrams (unNgramsTerm n)
+ | (n,np) <- p ^.. _PatchMap . each . _PatchMap . each . _NgramsTablePatch . _PatchMap . ifolded . withIndex
+ , _ <- np ^.. patch_new . _Just
+ ]
+
+-- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
+commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
+commitStatePatch (Versioned p_version p) = do
+ var <- view repoVar
+ vq' <- liftBase $ modifyMVar var $ \r -> do
+ let
+ q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
+ (p', q') = transformWith ngramsStatePatchConflictResolution p q
+ r' = r & r_version +~ 1
+ & r_state %~ act p'
+ & r_history %~ (p' :)
+ {-
+ -- Ideally we would like to check these properties. However:
+ -- * They should be checked only to debug the code. The client data
+ -- should be able to trigger these.
+ -- * What kind of error should they throw (we are in IO here)?
+ -- * Should we keep modifyMVar?
+ -- * Should we throw the validation in an Exception, catch it around
+ -- modifyMVar and throw it back as an Error?
+ assertValid $ transformable p q
+ assertValid $ applicable p' (r ^. r_state)
+ -}
+ pure (r', Versioned (r' ^. r_version) q')
+
+ saveRepo
+
+ -- Save new ngrams
+ _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
+
+ pure vq'
+
+-- This is a special case of tableNgramsPut where the input patch is empty.
+tableNgramsPull :: RepoCmdM env err m
+ => ListId
+ -> TableNgrams.NgramsType
+ -> Version
+ -> m (Versioned NgramsTablePatch)
+tableNgramsPull listId ngramsType p_version = do
+ var <- view repoVar
+ r <- liftBase $ readMVar var
+
+ let
+ q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
+ q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
+
+ pure (Versioned (r ^. r_version) q_table)
+
+-- Apply the given patch to the DB and returns the patch to be applied on the
+-- client.
+-- TODO-ACCESS check
+tableNgramsPut :: ( FlowCmdM env err m
+ , HasSettings env
+ )
+ => TabType
+ -> ListId
+ -> Versioned NgramsTablePatch
+ -> m (Versioned NgramsTablePatch)
+tableNgramsPut tabType listId (Versioned p_version p_table)
+ | p_table == mempty = do
+ let ngramsType = ngramsTypeFromTabType tabType
+ tableNgramsPull listId ngramsType p_version
+
+ | otherwise = do
+ let ngramsType = ngramsTypeFromTabType tabType
+ (p0, p0_validity) = PM.singleton listId p_table
+ (p, p_validity) = PM.singleton ngramsType p0
+
+ assertValid p0_validity
+ assertValid p_validity
+
+ ret <- commitStatePatch (Versioned p_version p)
+ <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
+
+ pure ret
+
+
+tableNgramsPostChartsAsync :: ( FlowCmdM env err m
+ , HasNodeError err
+ , HasSettings env
+ )
+ => UpdateTableNgramsCharts
+ -> (JobLog -> m ())
+ -> m JobLog
+tableNgramsPostChartsAsync utn logStatus = do
+ let tabType = utn ^. utn_tab_type
+ let listId = utn ^. utn_list_id
+
+ node <- getNode listId
+ let nId = node ^. node_id
+ _uId = node ^. node_user_id
+ mCId = node ^. node_parent_id
+
+ printDebug "[tableNgramsPut] tabType" tabType
+ printDebug "[tableNgramsPut] listId" listId
+
+ case mCId of
+ Nothing -> do
+ printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
+ pure $ jobLogFail $ jobLogInit 1
+ Just cId -> do
+ case tabType of
+ Authors -> do
+ -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
+ (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
+ logRef
+ _ <- Metrics.updatePie cId (Just listId) tabType Nothing
+ logRefSuccess
+
+ getRef
+ Institutes -> do
+ -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
+ -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
+ (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
+ logRef
+ _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
+ -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
+ logRefSuccess
+ _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
+ -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
+ logRefSuccess
+ _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
+ logRefSuccess
+
+ getRef
+ Sources -> do
+ -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
+ (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
+ logRef
+ _ <- Metrics.updatePie cId (Just listId) tabType Nothing
+ logRefSuccess
+
+ getRef
+ Terms -> do
+ -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
+ (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
+ logRef
+{-
+ _ <- Metrics.updateChart cId (Just listId) tabType Nothing
+ logRefSuccess
+ _ <- Metrics.updatePie cId (Just listId) tabType Nothing
+ logRefSuccess
+ _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
+ logRefSuccess
+ _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
+ logRefSuccess
+ _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
+ logRefSuccess
+ _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
+-}
+ logRefSuccess
+
+ getRef
+ _ -> do
+ printDebug "[tableNgramsPut] no update for tabType = " tabType
+ pure $ jobLogFail $ jobLogInit 1
+
+ {-
+ { _ne_list :: ListType
+ If we merge the parents/children we can potentially create cycles!
+ , _ne_parent :: Maybe NgramsTerm
+ , _ne_children :: MSet NgramsTerm
+ }
+ -}
+
+getNgramsTableMap :: RepoCmdM env err m
+ => NodeId
+ -> TableNgrams.NgramsType
+ -> m (Versioned NgramsTableMap)
+getNgramsTableMap nodeId ngramsType = do
+ v <- view repoVar
+ repo <- liftBase $ readMVar v
+ pure $ Versioned (repo ^. r_version)
+ (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
+
+dumpJsonTableMap :: RepoCmdM env err m
+ => Text
+ -> NodeId
+ -> TableNgrams.NgramsType
+ -> m ()
+dumpJsonTableMap fpath nodeId ngramsType = do
+ m <- getNgramsTableMap nodeId ngramsType
+ liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
+ pure ()
+
+type MinSize = Int
+type MaxSize = Int
-defaultList :: Connection -> CorpusId -> IO ListId
-defaultList c cId = view node_id <$> maybe (panic noListFound) identity
- <$> head
- <$> getListsWithParentId c cId
+-- | TODO Errors management
+-- TODO: polymorphic for Annuaire or Corpus or ...
+-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
+-- TODO: should take only one ListId
+
+
+getTableNgrams :: forall env err m.
+ (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
+ => NodeType -> NodeId -> TabType
+ -> ListId -> Limit -> Maybe Offset
+ -> Maybe ListType
+ -> Maybe MinSize -> Maybe MaxSize
+ -> Maybe OrderBy
+ -> (NgramsTerm -> Bool)
+ -> m (VersionedWithCount NgramsTable)
+getTableNgrams _nType nId tabType listId limit_ offset
+ listType minSize maxSize orderBy searchQuery = do
+
+ t0 <- getTime
+ -- lIds <- selectNodesWithUsername NodeList userMaster
+ let
+ ngramsType = ngramsTypeFromTabType tabType
+ offset' = maybe 0 identity offset
+ listType' = maybe (const True) (==) listType
+ minSize' = maybe (const True) (<=) minSize
+ maxSize' = maybe (const True) (>=) maxSize
+
+ selected_node n = minSize' s
+ && maxSize' s
+ && searchQuery (n ^. ne_ngrams)
+ && listType' (n ^. ne_list)
+ where
+ s = n ^. ne_size
+
+ selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
+
+ ---------------------------------------
+ sortOnOrder Nothing = identity
+ sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
+ sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
+ sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
+ sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
+
+ ---------------------------------------
+
+ filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
+ filteredNodes tableMap = rootOf <$> list & filter selected_node
+ where
+ rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
+ (tableMap ^. at r)
+ )
+ (ne ^. ne_root)
+ list = tableMap ^.. each
+
+ ---------------------------------------
+ selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
+ selectAndPaginate tableMap = roots <> inners
+ where
+ list = tableMap ^.. each
+ rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
+ (tableMap ^. at r)
+ )
+ (ne ^. ne_root)
+ selected_nodes = list & take limit_
+ . drop offset'
+ . filter selected_node
+ . sortOnOrder orderBy
+ roots = rootOf <$> selected_nodes
+ rootsSet = Set.fromList (_ne_ngrams <$> roots)
+ inners = list & filter (selected_inner rootsSet)
+
+ ---------------------------------------
+ setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
+ setScores False table = pure table
+ setScores True table = do
+ let ngrams_terms = table ^.. each . ne_ngrams
+ t1 <- getTime
+ occurrences <- getOccByNgramsOnlyFast' nId
+ listId
+ ngramsType
+ ngrams_terms
+ t2 <- getTime
+ liftBase $ hprint stderr
+ ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
+ (length ngrams_terms) t1 t2
+ {-
+ occurrences <- getOccByNgramsOnlySlow nType nId
+ (lIds <> [listId])
+ ngramsType
+ ngrams_terms
+ -}
+ let
+ setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
+
+ pure $ table & each %~ setOcc
+ ---------------------------------------
+
+ -- lists <- catMaybes <$> listsWith userMaster
+ -- trace (show lists) $
+ -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
+
+
+ let scoresNeeded = needsScores orderBy
+ tableMap1 <- getNgramsTableMap listId ngramsType
+ t1 <- getTime
+ tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
+ . Map.mapWithKey ngramsElementFromRepo
+
+ fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
+ . filteredNodes
+ let fltrCount = length $ fltr ^. v_data . _NgramsTable
+
+ t2 <- getTime
+ tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
+ . setScores (not scoresNeeded)
+ . selectAndPaginate
+ t3 <- getTime
+ liftBase $ hprint stderr
+ ("getTableNgrams total=" % hasTime
+ % " map1=" % hasTime
+ % " map2=" % hasTime
+ % " map3=" % hasTime
+ % " sql=" % (if scoresNeeded then "map2" else "map3")
+ % "\n"
+ ) t0 t3 t0 t1 t1 t2 t2 t3
+ pure $ toVersionedWithCount fltrCount tableMap3
+
+
+scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
+scoresRecomputeTableNgrams nId tabType listId = do
+ tableMap <- getNgramsTableMap listId ngramsType
+ _ <- tableMap & v_data %%~ setScores
+ . Map.mapWithKey ngramsElementFromRepo
+
+ pure $ 1
where
- noListFound = "Gargantext.API.Ngrams.defaultList: no list found"
+ ngramsType = ngramsTypeFromTabType tabType
-{-
-toLists :: ListId -> NgramsIdPatchs -> [(ListId, NgramsId, ListTypeId)]
--- toLists = undefined
-toLists lId np = [ (lId,ngId,listTypeId lt) | map (toList lId) (_nip_ngramsIdPatchs np) ]
+ setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
+ setScores table = do
+ let ngrams_terms = table ^.. each . ne_ngrams
+ occurrences <- getOccByNgramsOnlyFast' nId
+ listId
+ ngramsType
+ ngrams_terms
+ let
+ setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
-toList :: ListId -> NgramsIdPatch -> (ListId, NgramsId, ListTypeId)
-toList = undefined
+ pure $ table & each %~ setOcc
-toGroups :: ListId -> (NgramsPatch -> Set NgramsId) -> NgramsIdPatchs -> [NodeNgramsNgrams]
-toGroups lId addOrRem ps = concat $ map (toGroup lId addOrRem) $ _nip_ngramsIdPatchs ps
-toGroup :: ListId -> (NgramsPatch -> Set NgramsId) -> NgramsIdPatch -> [NodeNgramsNgrams]
--- toGroup = undefined
-toGroup lId addOrRem (NgramsIdPatch ngId patch) =
- map (\ng -> (NodeNgramsNgrams lId ngId ng (Just 1))) (Set.toList $ addOrRem patch)
--}
+-- APIs
+
+-- TODO: find a better place for the code above, All APIs stay here
+
+data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
+ deriving (Generic, Enum, Bounded, Read, Show)
+
+instance FromHttpApiData OrderBy
+ where
+ parseUrlPiece "TermAsc" = pure TermAsc
+ parseUrlPiece "TermDesc" = pure TermDesc
+ parseUrlPiece "ScoreAsc" = pure ScoreAsc
+ parseUrlPiece "ScoreDesc" = pure ScoreDesc
+ parseUrlPiece _ = Left "Unexpected value of OrderBy"
+
+
+instance ToParamSchema OrderBy
+instance FromJSON OrderBy
+instance ToJSON OrderBy
+instance ToSchema OrderBy
+instance Arbitrary OrderBy
+ where
+ arbitrary = elements [minBound..maxBound]
+
+needsScores :: Maybe OrderBy -> Bool
+needsScores (Just ScoreAsc) = True
+needsScores (Just ScoreDesc) = True
+needsScores _ = False
+
+type TableNgramsApiGet = Summary " Table Ngrams API Get"
+ :> QueryParamR "ngramsType" TabType
+ :> QueryParamR "list" ListId
+ :> QueryParamR "limit" Limit
+ :> QueryParam "offset" Offset
+ :> QueryParam "listType" ListType
+ :> QueryParam "minTermSize" MinSize
+ :> QueryParam "maxTermSize" MaxSize
+ :> QueryParam "orderBy" OrderBy
+ :> QueryParam "search" Text
+ :> Get '[JSON] (VersionedWithCount NgramsTable)
+
+type TableNgramsApiPut = Summary " Table Ngrams API Change"
+ :> QueryParamR "ngramsType" TabType
+ :> QueryParamR "list" ListId
+ :> ReqBody '[JSON] (Versioned NgramsTablePatch)
+ :> Put '[JSON] (Versioned NgramsTablePatch)
+
+type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
+ :> QueryParamR "ngramsType" TabType
+ :> QueryParamR "list" ListId
+ :> "recompute" :> Post '[JSON] Int
+
+type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
+ :> QueryParamR "ngramsType" TabType
+ :> QueryParamR "list" ListId
+ :> Get '[JSON] Version
+
+type TableNgramsApi = TableNgramsApiGet
+ :<|> TableNgramsApiPut
+ :<|> RecomputeScoresNgramsApiGet
+ :<|> "version" :> TableNgramsApiGetVersion
+ :<|> TableNgramsAsyncApi
+
+type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
+ :> "async"
+ :> "charts"
+ :> "update"
+ :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
+
+getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
+ => NodeId
+ -> TabType
+ -> ListId
+ -> Limit
+ -> Maybe Offset
+ -> Maybe ListType
+ -> Maybe MinSize -> Maybe MaxSize
+ -> Maybe OrderBy
+ -> Maybe Text -- full text search
+ -> m (VersionedWithCount NgramsTable)
+getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
+ getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
+ where
+ searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
+
+getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
+ => NodeId
+ -> TabType
+ -> ListId
+ -> m Version
+getTableNgramsVersion _nId _tabType _listId = currentVersion
+ -- TODO: limit?
+ -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
+ -- This line above looks like a waste of computation to finally get only the version.
+ -- See the comment about listNgramsChangedSince.
+
+
+-- | Text search is deactivated for now for ngrams by doc only
+getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
+ => DocId -> TabType
+ -> ListId -> Limit -> Maybe Offset
+ -> Maybe ListType
+ -> Maybe MinSize -> Maybe MaxSize
+ -> Maybe OrderBy
+ -> Maybe Text -- full text search
+ -> m (VersionedWithCount NgramsTable)
+getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
+ ns <- selectNodesWithUsername NodeList userMaster
+ let ngramsType = ngramsTypeFromTabType tabType
+ ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
+ let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
+ getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
+
+
+
+apiNgramsTableCorpus :: ( GargServerC env err m
+ )
+ => NodeId -> ServerT TableNgramsApi m
+apiNgramsTableCorpus cId = getTableNgramsCorpus cId
+ :<|> tableNgramsPut
+ :<|> scoresRecomputeTableNgrams cId
+ :<|> getTableNgramsVersion cId
+ :<|> apiNgramsAsync cId
+
+apiNgramsTableDoc :: ( GargServerC env err m
+ )
+ => DocId -> ServerT TableNgramsApi m
+apiNgramsTableDoc dId = getTableNgramsDoc dId
+ :<|> tableNgramsPut
+ :<|> scoresRecomputeTableNgrams dId
+ :<|> getTableNgramsVersion dId
+ :<|> apiNgramsAsync dId
+ -- > index all the corpus accordingly (TODO AD)
+
+apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
+apiNgramsAsync _dId =
+ serveJobsAPI $
+ JobFunction $ \i log ->
+ let
+ log' x = do
+ printDebug "tableNgramsPostChartsAsync" x
+ liftBase $ log x
+ in tableNgramsPostChartsAsync i log'
+
+-- Did the given list of ngrams changed since the given version?
+-- The returned value is versioned boolean value, meaning that one always retrieve the
+-- latest version.
+-- If the given version is negative then one simply receive the latest version and True.
+-- Using this function is more precise than simply comparing the latest version number
+-- with the local version number. Indeed there might be no change to this particular list
+-- and still the version number has changed because of other lists.
+--
+-- Here the added value is to make a compromise between precision, computation, and bandwidth:
+-- * currentVersion: good computation, good bandwidth, bad precision.
+-- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
+-- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
+listNgramsChangedSince :: RepoCmdM env err m
+ => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
+listNgramsChangedSince listId ngramsType version
+ | version < 0 =
+ Versioned <$> currentVersion <*> pure True
+ | otherwise =
+ tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
-tableNgramsPatch :: Connection -> CorpusId -> Maybe ListId -> NgramsIdPatchsFeed -> IO NgramsIdPatchsBack
-tableNgramsPatch = undefined
-{-
-tableNgramsPatch conn corpusId maybeList patchs = do
- listId <- case maybeList of
- Nothing -> defaultList conn corpusId
- Just listId' -> pure listId'
- _ <- ngramsGroup' conn Add $ toGroups listId _np_add_children patchs
- _ <- ngramsGroup' conn Del $ toGroups listId _np_rem_children patchs
- _ <- updateNodeNgrams conn (toLists listId patchs)
- pure (NgramsIdPatchs [])
--}
--- | TODO Errors management
--- TODO: polymorphic for Annuaire or Corpus or ...
-getTableNgrams :: Connection -> CorpusId -> Maybe TabType -> Maybe ListId -> IO NgramsTable
-getTableNgrams c cId maybeTabType maybeListId = do
- let lieu = "Garg.API.Ngrams: " :: Text
- let ngramsType = case maybeTabType of
- Nothing -> Ngrams.Sources -- panic (lieu <> "Indicate the Table")
- Just tab -> case tab of
- Sources -> Ngrams.Sources
- Authors -> Ngrams.Authors
- Institutes -> Ngrams.Institutes
- Terms -> Ngrams.NgramsTerms
- _ -> panic $ lieu <> "No Ngrams for this tab"
-
- listId <- case maybeListId of
- Nothing -> defaultList c cId
- Just lId -> pure lId
-
- (ngramsTableDatas, mapToParent, mapToChildren) <-
- Ngrams.getNgramsTableDb c NodeDocument ngramsType (Ngrams.NgramsTableParam listId cId)
-
- printDebug "ngramsTableDatas" ngramsTableDatas
-
- pure $ NgramsTable $ map (\(Ngrams.NgramsTableData ngs _ lt w) ->
- NgramsElement ngs
- (maybe (panic $ lieu <> "listType") identity lt)
- (round w)
- (lookup ngs mapToParent)
- (maybe mempty identity $ lookup ngs mapToChildren)
- ) ngramsTableDatas