2 Module : Gargantext.Database.Metrics
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE QuasiQuotes #-}
15 module Gargantext.Database.Action.Metrics
18 import Database.PostgreSQL.Simple.SqlQQ (sql)
19 import Data.HashMap.Strict (HashMap)
20 import Data.Map.Strict (Map)
22 import Database.PostgreSQL.Simple (Query, Only(..))
23 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
24 import Data.Vector (Vector)
25 import Gargantext.Core (HasDBid(toDBid))
26 import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo)
27 import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
28 import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
29 import Gargantext.Core.Mail.Types (HasMail)
30 import Gargantext.Core.NodeStory hiding (runPGSQuery)
31 import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
32 import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
33 import Gargantext.Core.Types (ListType(..), NodeType(..), ContextId)
34 import Gargantext.Core.Types.Query (Limit(..))
35 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
36 import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
37 import Gargantext.Database.Admin.Config (userMaster)
38 import Gargantext.Database.Action.Metrics.NgramsByContext (refreshNgramsMaterialized)
39 import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
40 import Gargantext.Database.Query.Table.Node (defaultList)
41 import Gargantext.Database.Query.Table.Node.Select
42 import Gargantext.Prelude
43 import qualified Data.HashMap.Strict as HM
44 import qualified Data.Map.Strict as Map
45 import qualified Data.Set as Set
46 import qualified Data.List as List
47 import qualified Data.Text as Text
49 getMetrics :: FlowCmdM env err m
50 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
51 -> m (HashMap NgramsTerm (ListType, Maybe NgramsTerm), Vector (Scored NgramsTerm))
52 getMetrics cId maybeListId tabType maybeLimit = do
53 (ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
55 pure (ngs, scored myCooc)
58 getNgramsCooc :: (FlowCmdM env err m)
59 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
60 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
61 , HashMap NgramsTerm (Maybe RootTerm)
62 , HashMap (NgramsTerm, NgramsTerm) Int
64 getNgramsCooc cId maybeListId tabType maybeLimit = do
66 lId <- case maybeListId of
67 Nothing -> defaultList cId
68 Just lId' -> pure lId'
70 (ngs', ngs) <- getNgrams lId tabType
72 lIds <- selectNodesWithUsername NodeList userMaster
74 myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
75 <$> groupNodesByNgrams ngs
76 <$> getContextsByNgramsOnlyUser cId
78 (ngramsTypeFromTabType tabType)
79 (take' maybeLimit $ HM.keys ngs)
80 pure $ (ngs', ngs, myCooc)
82 ------------------------------------------------------------------------
83 ------------------------------------------------------------------------
84 updateNgramsOccurrences :: (FlowCmdM env err m)
85 => CorpusId -> Maybe ListId
87 updateNgramsOccurrences cId mlId = do
88 _ <- mapM (updateNgramsOccurrences' cId mlId Nothing) [Terms, Sources, Authors, Institutes]
92 updateNgramsOccurrences' :: (FlowCmdM env err m)
93 => CorpusId -> Maybe ListId -> Maybe Limit -> TabType
95 updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
97 lId <- case maybeListId of
98 Nothing -> defaultList cId
99 Just lId' -> pure lId'
101 result <- getNgramsOccurrences cId lId tabType maybeLimit
104 toInsert :: [[Action]]
105 toInsert = map (\(ngramsTerm, score)
108 , toField $ unNgramsTerm ngramsTerm
109 , toField $ toDBid $ ngramsTypeFromTabType tabType
117 WITH input(corpus_id, list_id, terms, type_id, weight) AS (?)
118 INSERT into node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
119 SELECT input.corpus_id,input.list_id,ngrams.id,input.type_id,input.weight FROM input
120 JOIN ngrams on ngrams.terms = input.terms
121 ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
122 DO UPDATE SET weight = excluded.weight
126 let fields = map (\t-> QualifiedIdentifier Nothing t)
127 $ map Text.pack ["int4", "int4","text","int4","int4"]
129 res <- map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
131 -- _ <- map (\(Only a) -> a) <$> runPGSQuery [sql|refresh materialized view context_node_ngrams_view;|] ()
132 _ <- refreshNgramsMaterialized
137 ------------------------------------------------------------------------
138 -- Used for scores in Ngrams Table
139 getNgramsOccurrences :: (FlowCmdM env err m)
140 => CorpusId -> ListId -> TabType -> Maybe Limit
141 -> m (HashMap NgramsTerm Int)
142 getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml
146 getNgramsContexts :: (FlowCmdM env err m)
147 => CorpusId -> ListId -> TabType -> Maybe Limit
148 -> m (HashMap NgramsTerm (Set ContextId))
149 getNgramsContexts cId lId tabType maybeLimit = do
150 (_ngs', ngs) <- getNgrams lId tabType
151 lIds <- selectNodesWithUsername NodeList userMaster
153 -- TODO maybe add an option to group here
154 getContextsByNgramsOnlyUser cId
156 (ngramsTypeFromTabType tabType)
157 (take' maybeLimit $ HM.keys ngs)
161 ------------------------------------------------------------------------
162 updateContextScore :: (FlowCmdM env err m)
163 => CorpusId -> Maybe ListId
165 updateContextScore cId maybeListId = do
167 lId <- case maybeListId of
168 Nothing -> defaultList cId
169 Just lId' -> pure lId'
171 result <- getContextsNgramsScore cId lId Terms MapTerm Nothing
174 toInsert :: [[Action]]
175 toInsert = map (\(contextId, score)
185 WITH input(node_id, context_id, score) AS (?)
186 UPDATE nodes_contexts nc
187 SET score = input.score
189 WHERE nc.node_id = input.node_id
190 AND nc.context_id = input.context_id
194 let fields = map (\t-> QualifiedIdentifier Nothing t)
195 $ map Text.pack ["int4", "int4","int4"]
197 map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
202 -- Used for scores in Doc Table
203 getContextsNgramsScore :: (FlowCmdM env err m)
204 => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
205 -> m (Map ContextId Int)
206 getContextsNgramsScore cId lId tabType listType maybeLimit
207 = Map.map Set.size <$> getContextsNgrams cId lId tabType listType maybeLimit
209 getContextsNgrams :: (FlowCmdM env err m)
210 => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
211 -> m (Map ContextId (Set NgramsTerm))
212 getContextsNgrams cId lId tabType listType maybeLimit = do
213 (ngs', ngs) <- getNgrams lId tabType
214 lIds <- selectNodesWithUsername NodeList userMaster
216 result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser
219 (ngramsTypeFromTabType tabType)
222 $ HM.filter (\v -> fst v == listType) ngs'
224 -- printDebug "getCoocByNgrams" result
225 pure $ Map.fromListWith (<>)
227 $ map (\(ng, contexts) -> List.zip (Set.toList contexts) (List.cycle [Set.singleton ng]))
231 ------------------------------------------------------------------------
232 ------------------------------------------------------------------------
235 getNgrams :: (HasMail env, HasNodeStory env err m)
237 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
238 , HashMap NgramsTerm (Maybe RootTerm)
240 getNgrams lId tabType = do
242 lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo [lId]
243 -- TODO filterListWithRoot [MapTerm, StopTerm, CandidateTerm] lists
244 let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
245 [[MapTerm], [StopTerm], [CandidateTerm]]
246 pure (lists, maybeSyn)
249 take' :: Maybe Limit -> [a] -> [a]
250 take' Nothing xs = xs
251 take' (Just n) xs = take (getLimit n) xs