2 Module : Gargantext.Database.Metrics
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE QuasiQuotes #-}
15 module Gargantext.Database.Action.Metrics
18 import Database.PostgreSQL.Simple.SqlQQ (sql)
19 import Data.HashMap.Strict (HashMap)
22 import Database.PostgreSQL.Simple (Query, Only(..))
23 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
24 import Data.Vector (Vector)
25 import Gargantext.Core (HasDBid(toDBid))
26 import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo')
27 import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
28 import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
29 import Gargantext.Core.Mail.Types (HasMail)
30 import Gargantext.Core.NodeStory
31 import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
32 import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
33 import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId)
34 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
35 import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
36 import Gargantext.Database.Admin.Config (userMaster)
37 import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
38 import Gargantext.Database.Query.Table.Node (defaultList)
39 import Gargantext.Database.Query.Table.Node.Select
40 import Gargantext.Prelude
41 import qualified Data.HashMap.Strict as HM
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.List as List
45 import qualified Data.Text as Text
47 getMetrics :: FlowCmdM env err m
48 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
49 -> m (HashMap NgramsTerm (ListType, Maybe NgramsTerm), Vector (Scored NgramsTerm))
50 getMetrics cId maybeListId tabType maybeLimit = do
51 (ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
53 pure (ngs, scored myCooc)
56 getNgramsCooc :: (FlowCmdM env err m)
57 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
58 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
59 , HashMap NgramsTerm (Maybe RootTerm)
60 , HashMap (NgramsTerm, NgramsTerm) Int
62 getNgramsCooc cId maybeListId tabType maybeLimit = do
64 lId <- case maybeListId of
65 Nothing -> defaultList cId
66 Just lId' -> pure lId'
68 (ngs', ngs) <- getNgrams lId tabType
70 lIds <- selectNodesWithUsername NodeList userMaster
72 myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
73 <$> groupNodesByNgrams ngs
74 <$> getContextsByNgramsOnlyUser cId
76 (ngramsTypeFromTabType tabType)
77 (take' maybeLimit $ HM.keys ngs)
78 pure $ (ngs', ngs, myCooc)
80 ------------------------------------------------------------------------
81 ------------------------------------------------------------------------
82 updateNgramsOccurrences :: (FlowCmdM env err m)
83 => CorpusId -> Maybe ListId
85 updateNgramsOccurrences cId mlId = do
86 _ <- mapM (updateNgramsOccurrences' cId mlId Nothing) [Terms, Sources, Authors, Institutes]
90 updateNgramsOccurrences' :: (FlowCmdM env err m)
91 => CorpusId -> Maybe ListId -> Maybe Limit -> TabType
93 updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
95 lId <- case maybeListId of
96 Nothing -> defaultList cId
97 Just lId' -> pure lId'
99 result <- getNgramsOccurrences cId lId tabType maybeLimit
102 toInsert :: [[Action]]
103 toInsert = map (\(ngramsTerm, score)
106 , toField $ unNgramsTerm ngramsTerm
107 , toField $ toDBid $ ngramsTypeFromTabType tabType
115 WITH input(corpus_id, list_id, terms, type_id, weight) AS (?)
116 INSERT into node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
117 SELECT input.corpus_id,input.list_id,ngrams.id,input.type_id,input.weight FROM input
118 JOIN ngrams on ngrams.terms = input.terms
119 ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
120 DO UPDATE SET weight = excluded.weight
124 let fields = map (\t-> QualifiedIdentifier Nothing t)
125 $ map Text.pack ["int4", "int4","text","int4","int4"]
127 map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
131 ------------------------------------------------------------------------
132 -- Used for scores in Ngrams Table
133 getNgramsOccurrences :: (FlowCmdM env err m)
134 => CorpusId -> ListId -> TabType -> Maybe Limit
135 -> m (HashMap NgramsTerm Int)
136 getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml
140 getNgramsContexts :: (FlowCmdM env err m)
141 => CorpusId -> ListId -> TabType -> Maybe Limit
142 -> m (HashMap NgramsTerm (Set ContextId))
143 getNgramsContexts cId lId tabType maybeLimit = do
144 (_ngs', ngs) <- getNgrams lId tabType
145 lIds <- selectNodesWithUsername NodeList userMaster
147 -- TODO maybe add an option to group here
148 getContextsByNgramsOnlyUser cId
150 (ngramsTypeFromTabType tabType)
151 (take' maybeLimit $ HM.keys ngs)
155 ------------------------------------------------------------------------
156 updateContextScore :: (FlowCmdM env err m)
157 => CorpusId -> Maybe ListId
159 updateContextScore cId maybeListId = do
161 lId <- case maybeListId of
162 Nothing -> defaultList cId
163 Just lId' -> pure lId'
165 result <- getContextsNgramsScore cId lId Terms MapTerm Nothing
168 toInsert :: [[Action]]
169 toInsert = map (\(contextId, score)
179 WITH input(node_id, context_id, score) AS (?)
180 UPDATE nodes_contexts nc
181 SET score = input.score
183 WHERE nc.node_id = input.node_id
184 AND nc.context_id = input.context_id
188 let fields = map (\t-> QualifiedIdentifier Nothing t)
189 $ map Text.pack ["int4", "int4","int4"]
191 map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
196 -- Used for scores in Doc Table
197 getContextsNgramsScore :: (FlowCmdM env err m)
198 => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
199 -> m (Map ContextId Int)
200 getContextsNgramsScore cId lId tabType listType maybeLimit
201 = Map.map Set.size <$> getContextsNgrams cId lId tabType listType maybeLimit
203 getContextsNgrams :: (FlowCmdM env err m)
204 => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
205 -> m (Map ContextId (Set NgramsTerm))
206 getContextsNgrams cId lId tabType listType maybeLimit = do
207 (ngs', ngs) <- getNgrams lId tabType
208 lIds <- selectNodesWithUsername NodeList userMaster
210 result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser
213 (ngramsTypeFromTabType tabType)
216 $ HM.filter (\v -> fst v == listType) ngs'
218 -- printDebug "getCoocByNgrams" result
219 pure $ Map.fromListWith (<>)
221 $ map (\(ng, contexts) -> List.zip (Set.toList contexts) (List.cycle [Set.singleton ng]))
225 ------------------------------------------------------------------------
226 ------------------------------------------------------------------------
229 getNgrams :: (HasMail env, HasNodeStory env err m)
231 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
232 , HashMap NgramsTerm (Maybe RootTerm)
234 getNgrams lId tabType = do
236 lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo' [lId]
237 -- TODO filterListWithRoot [MapTerm, StopTerm, CandidateTerm] lists
238 let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
239 [[MapTerm], [StopTerm], [CandidateTerm]]
240 pure (lists, maybeSyn)
243 take' :: Maybe Int -> [a] -> [a]
244 take' Nothing xs = xs
245 take' (Just n) xs = take n xs