Node API
-}
-
+{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Action.Metrics
where
+import Database.PostgreSQL.Simple.SqlQQ (sql)
+import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
-import Data.Text (Text)
-import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType)
-import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo)
-import Gargantext.Core.Types (ListType(..), Limit, NodeType(..))
-import Gargantext.Database.Action.Flow (FlowCmdM)
-import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser{-, getTficfWith-})
+import Data.Set (Set)
+import Database.PostgreSQL.Simple (Query, Only(..))
+import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
+import Data.Vector (Vector)
+import Gargantext.Core (HasDBid(toDBid))
+import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo')
+import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
+import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
+import Gargantext.Core.Mail.Types (HasMail)
+import Gargantext.Core.NodeStory
+import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
+import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
+import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId)
+import Gargantext.Database.Action.Flow.Types (FlowCmdM)
+import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Prelude
-import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
-import qualified Data.Map as Map
+import qualified Data.HashMap.Strict as HM
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import qualified Data.List as List
+import qualified Data.Text as Text
getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
- -> m (Map Text (ListType, Maybe Text), [Scored Text])
+ -> m (HashMap NgramsTerm (ListType, Maybe NgramsTerm), Vector (Scored NgramsTerm))
getMetrics cId maybeListId tabType maybeLimit = do
(ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
+ -- TODO HashMap
pure (ngs, scored myCooc)
getNgramsCooc :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
- -> m ( Map Text (ListType, Maybe Text)
- , Map Text (Maybe RootTerm)
- , Map (Text, Text) Int
+ -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
+ , HashMap NgramsTerm (Maybe RootTerm)
+ , HashMap (NgramsTerm, NgramsTerm) Int
)
getNgramsCooc cId maybeListId tabType maybeLimit = do
- (ngs', ngs) <- getNgrams cId maybeListId tabType
-
- let
- take' Nothing xs = xs
- take' (Just n) xs = take n xs
- lId <- defaultList cId
+ lId <- case maybeListId of
+ Nothing -> defaultList cId
+ Just lId' -> pure lId'
+
+ (ngs', ngs) <- getNgrams lId tabType
+
lIds <- selectNodesWithUsername NodeList userMaster
- myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal True)
- <$> groupNodesByNgrams ngs
- <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) (ngramsTypeFromTabType tabType)
- (take' maybeLimit $ Map.keys ngs)
+ myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
+ <$> groupNodesByNgrams ngs
+ <$> getContextsByNgramsOnlyUser cId
+ (lIds <> [lId])
+ (ngramsTypeFromTabType tabType)
+ (take' maybeLimit $ HM.keys ngs)
pure $ (ngs', ngs, myCooc)
+------------------------------------------------------------------------
+------------------------------------------------------------------------
+updateNgramsOccurrences :: (FlowCmdM env err m)
+ => CorpusId -> Maybe ListId
+ -> m ()
+updateNgramsOccurrences cId mlId = do
+ _ <- mapM (updateNgramsOccurrences' cId mlId Nothing) [Terms, Sources, Authors, Institutes]
+ pure ()
-getNgrams :: (FlowCmdM env err m)
- => CorpusId -> Maybe ListId -> TabType
- -> m (Map Text (ListType, Maybe Text), Map Text (Maybe RootTerm))
-getNgrams cId maybeListId tabType = do
+updateNgramsOccurrences' :: (FlowCmdM env err m)
+ => CorpusId -> Maybe ListId -> Maybe Limit -> TabType
+ -> m [Int]
+updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
- lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo
- let maybeSyn = Map.unions $ map (\t -> filterListWithRoot t lists)
- [MapTerm, StopTerm, CandidateTerm]
+ result <- getNgramsOccurrences cId lId tabType maybeLimit
+
+ let
+ toInsert :: [[Action]]
+ toInsert = map (\(ngramsTerm, score)
+ -> [ toField cId
+ , toField lId
+ , toField $ unNgramsTerm ngramsTerm
+ , toField $ toDBid $ ngramsTypeFromTabType tabType
+ , toField score
+ ]
+ )
+ $ HM.toList result
+
+ queryInsert :: Query
+ queryInsert = [sql|
+ WITH input(corpus_id, list_id, terms, type_id, weight) AS (?)
+ INSERT into node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
+ SELECT input.corpus_id,input.list_id,ngrams.id,input.type_id,input.weight FROM input
+ JOIN ngrams on ngrams.terms = input.terms
+ ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
+ DO UPDATE SET weight = excluded.weight
+ RETURNING 1
+ |]
+
+ let fields = map (\t-> QualifiedIdentifier Nothing t)
+ $ map Text.pack ["int4", "int4","text","int4","int4"]
+
+ map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
+
+
+
+------------------------------------------------------------------------
+-- Used for scores in Ngrams Table
+getNgramsOccurrences :: (FlowCmdM env err m)
+ => CorpusId -> ListId -> TabType -> Maybe Limit
+ -> m (HashMap NgramsTerm Int)
+getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml
+
+
+
+getNgramsContexts :: (FlowCmdM env err m)
+ => CorpusId -> ListId -> TabType -> Maybe Limit
+ -> m (HashMap NgramsTerm (Set ContextId))
+getNgramsContexts cId lId tabType maybeLimit = do
+ (_ngs', ngs) <- getNgrams lId tabType
+ lIds <- selectNodesWithUsername NodeList userMaster
+
+ -- TODO maybe add an option to group here
+ getContextsByNgramsOnlyUser cId
+ (lIds <> [lId])
+ (ngramsTypeFromTabType tabType)
+ (take' maybeLimit $ HM.keys ngs)
+
+
+
+------------------------------------------------------------------------
+updateContextScore :: (FlowCmdM env err m)
+ => CorpusId -> Maybe ListId
+ -> m [Int]
+updateContextScore cId maybeListId = do
+
+ lId <- case maybeListId of
+ Nothing -> defaultList cId
+ Just lId' -> pure lId'
+
+ result <- getContextsNgramsScore cId lId Terms MapTerm Nothing
+
+ let
+ toInsert :: [[Action]]
+ toInsert = map (\(contextId, score)
+ -> [ toField cId
+ , toField contextId
+ , toField score
+ ]
+ )
+ $ Map.toList result
+
+ queryInsert :: Query
+ queryInsert = [sql|
+ WITH input(node_id, context_id, score) AS (?)
+ UPDATE nodes_contexts nc
+ SET score = input.score
+ FROM input
+ WHERE nc.node_id = input.node_id
+ AND nc.context_id = input.context_id
+ RETURNING 1
+ |]
+
+ let fields = map (\t-> QualifiedIdentifier Nothing t)
+ $ map Text.pack ["int4", "int4","int4"]
+
+ map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
+
+
+
+
+-- Used for scores in Doc Table
+getContextsNgramsScore :: (FlowCmdM env err m)
+ => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
+ -> m (Map ContextId Int)
+getContextsNgramsScore cId lId tabType listType maybeLimit
+ = Map.map Set.size <$> getContextsNgrams cId lId tabType listType maybeLimit
+
+getContextsNgrams :: (FlowCmdM env err m)
+ => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
+ -> m (Map ContextId (Set NgramsTerm))
+getContextsNgrams cId lId tabType listType maybeLimit = do
+ (ngs', ngs) <- getNgrams lId tabType
+ lIds <- selectNodesWithUsername NodeList userMaster
+
+ result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser
+ cId
+ (lIds <> [lId])
+ (ngramsTypeFromTabType tabType)
+ ( take' maybeLimit
+ $ HM.keys
+ $ HM.filter (\v -> fst v == listType) ngs'
+ )
+ -- printDebug "getCoocByNgrams" result
+ pure $ Map.fromListWith (<>)
+ $ List.concat
+ $ map (\(ng, contexts) -> List.zip (Set.toList contexts) (List.cycle [Set.singleton ng]))
+ $ HM.toList result
+
+
+------------------------------------------------------------------------
+------------------------------------------------------------------------
+
+
+getNgrams :: (HasMail env, HasNodeStory env err m)
+ => ListId -> TabType
+ -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
+ , HashMap NgramsTerm (Maybe RootTerm)
+ )
+getNgrams lId tabType = do
+
+ lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo' [lId]
+ let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
+ [MapTerm, StopTerm, CandidateTerm]
pure (lists, maybeSyn)
+-- Some useful Tools
+take' :: Maybe Int -> [a] -> [a]
+take' Nothing xs = xs
+take' (Just n) xs = take n xs
+
+
+