2 Module : Gargantext.Database.Metrics
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE QuasiQuotes #-}
15 module Gargantext.Database.Action.Metrics
18 import Database.PostgreSQL.Simple.SqlQQ (sql)
19 import Data.HashMap.Strict (HashMap)
20 import Data.Map.Strict (Map)
22 import Database.PostgreSQL.Simple (Query, Only(..))
23 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
24 import Data.Vector (Vector)
25 import Gargantext.Core (HasDBid(toDBid))
26 import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo)
27 import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
28 import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
29 import Gargantext.Core.Mail.Types (HasMail)
30 import Gargantext.Core.NodeStory hiding (runPGSQuery)
31 import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
32 import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
33 import Gargantext.Core.Types (ListType(..), NodeType(..), ContextId)
34 import Gargantext.Core.Types.Query (Limit(..))
35 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
36 import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
37 import Gargantext.Database.Admin.Config (userMaster)
38 import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
39 import Gargantext.Database.Query.Table.Node (defaultList)
40 import Gargantext.Database.Query.Table.Node.Select
41 import Gargantext.Prelude
42 import qualified Data.HashMap.Strict as HM
43 import qualified Data.Map.Strict as Map
44 import qualified Data.Set as Set
45 import qualified Data.List as List
46 import qualified Data.Text as Text
48 getMetrics :: FlowCmdM env err m
49 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
50 -> m (HashMap NgramsTerm (ListType, Maybe NgramsTerm), Vector (Scored NgramsTerm))
51 getMetrics cId maybeListId tabType maybeLimit = do
52 (ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
54 pure (ngs, scored myCooc)
57 getNgramsCooc :: (FlowCmdM env err m)
58 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
59 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
60 , HashMap NgramsTerm (Maybe RootTerm)
61 , HashMap (NgramsTerm, NgramsTerm) Int
63 getNgramsCooc cId maybeListId tabType maybeLimit = do
65 lId <- case maybeListId of
66 Nothing -> defaultList cId
67 Just lId' -> pure lId'
69 (ngs', ngs) <- getNgrams lId tabType
71 lIds <- selectNodesWithUsername NodeList userMaster
73 myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
74 <$> groupNodesByNgrams ngs
75 <$> getContextsByNgramsOnlyUser cId
77 (ngramsTypeFromTabType tabType)
78 (take' maybeLimit $ HM.keys ngs)
79 pure $ (ngs', ngs, myCooc)
81 ------------------------------------------------------------------------
82 ------------------------------------------------------------------------
83 updateNgramsOccurrences :: (FlowCmdM env err m)
84 => CorpusId -> Maybe ListId
86 updateNgramsOccurrences cId mlId = do
87 _ <- mapM (updateNgramsOccurrences' cId mlId Nothing) [Terms, Sources, Authors, Institutes]
91 updateNgramsOccurrences' :: (FlowCmdM env err m)
92 => CorpusId -> Maybe ListId -> Maybe Limit -> TabType
94 updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
96 lId <- case maybeListId of
97 Nothing -> defaultList cId
98 Just lId' -> pure lId'
100 result <- getNgramsOccurrences cId lId tabType maybeLimit
103 toInsert :: [[Action]]
104 toInsert = map (\(ngramsTerm, score)
107 , toField $ unNgramsTerm ngramsTerm
108 , toField $ toDBid $ ngramsTypeFromTabType tabType
116 WITH input(corpus_id, list_id, terms, type_id, weight) AS (?)
117 INSERT into node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
118 SELECT input.corpus_id,input.list_id,ngrams.id,input.type_id,input.weight FROM input
119 JOIN ngrams on ngrams.terms = input.terms
120 ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
121 DO UPDATE SET weight = excluded.weight
125 let fields = map (\t-> QualifiedIdentifier Nothing t)
126 $ map Text.pack ["int4", "int4","text","int4","int4"]
128 map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
132 ------------------------------------------------------------------------
133 -- Used for scores in Ngrams Table
134 getNgramsOccurrences :: (FlowCmdM env err m)
135 => CorpusId -> ListId -> TabType -> Maybe Limit
136 -> m (HashMap NgramsTerm Int)
137 getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml
141 getNgramsContexts :: (FlowCmdM env err m)
142 => CorpusId -> ListId -> TabType -> Maybe Limit
143 -> m (HashMap NgramsTerm (Set ContextId))
144 getNgramsContexts cId lId tabType maybeLimit = do
145 (_ngs', ngs) <- getNgrams lId tabType
146 lIds <- selectNodesWithUsername NodeList userMaster
148 -- TODO maybe add an option to group here
149 getContextsByNgramsOnlyUser cId
151 (ngramsTypeFromTabType tabType)
152 (take' maybeLimit $ HM.keys ngs)
156 ------------------------------------------------------------------------
157 updateContextScore :: (FlowCmdM env err m)
158 => CorpusId -> Maybe ListId
160 updateContextScore cId maybeListId = do
162 lId <- case maybeListId of
163 Nothing -> defaultList cId
164 Just lId' -> pure lId'
166 result <- getContextsNgramsScore cId lId Terms MapTerm Nothing
169 toInsert :: [[Action]]
170 toInsert = map (\(contextId, score)
180 WITH input(node_id, context_id, score) AS (?)
181 UPDATE nodes_contexts nc
182 SET score = input.score
184 WHERE nc.node_id = input.node_id
185 AND nc.context_id = input.context_id
189 let fields = map (\t-> QualifiedIdentifier Nothing t)
190 $ map Text.pack ["int4", "int4","int4"]
192 map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
197 -- Used for scores in Doc Table
198 getContextsNgramsScore :: (FlowCmdM env err m)
199 => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
200 -> m (Map ContextId Int)
201 getContextsNgramsScore cId lId tabType listType maybeLimit
202 = Map.map Set.size <$> getContextsNgrams cId lId tabType listType maybeLimit
204 getContextsNgrams :: (FlowCmdM env err m)
205 => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
206 -> m (Map ContextId (Set NgramsTerm))
207 getContextsNgrams cId lId tabType listType maybeLimit = do
208 (ngs', ngs) <- getNgrams lId tabType
209 lIds <- selectNodesWithUsername NodeList userMaster
211 result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser
214 (ngramsTypeFromTabType tabType)
217 $ HM.filter (\v -> fst v == listType) ngs'
219 -- printDebug "getCoocByNgrams" result
220 pure $ Map.fromListWith (<>)
222 $ map (\(ng, contexts) -> List.zip (Set.toList contexts) (List.cycle [Set.singleton ng]))
226 ------------------------------------------------------------------------
227 ------------------------------------------------------------------------
230 getNgrams :: (HasMail env, HasNodeStory env err m)
232 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
233 , HashMap NgramsTerm (Maybe RootTerm)
235 getNgrams lId tabType = do
237 lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo [lId]
238 -- TODO filterListWithRoot [MapTerm, StopTerm, CandidateTerm] lists
239 let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
240 [[MapTerm], [StopTerm], [CandidateTerm]]
241 pure (lists, maybeSyn)
244 take' :: Maybe Limit -> [a] -> [a]
245 take' Nothing xs = xs
246 take' (Just n) xs = take (getLimit n) xs