]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Metrics.hs
[VERSION] +1 to 0.0.6.9.9.4.3
[gargantext.git] / src / Gargantext / Database / Action / Metrics.hs
1 {-|
2 Module : Gargantext.Database.Metrics
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Node API
11 -}
12
13 {-# LANGUAGE QuasiQuotes #-}
14
15 module Gargantext.Database.Action.Metrics
16 where
17
18 import Database.PostgreSQL.Simple.SqlQQ (sql)
19 import Data.HashMap.Strict (HashMap)
20 import Data.Map.Strict (Map)
21 import Data.Set (Set)
22 import Database.PostgreSQL.Simple (Query, Only(..))
23 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
24 import Data.Vector (Vector)
25 import Gargantext.Core (HasDBid(toDBid))
26 import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo)
27 import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
28 import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
29 import Gargantext.Core.Mail.Types (HasMail)
30 import Gargantext.Core.NodeStory hiding (runPGSQuery)
31 import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
32 import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
33 import Gargantext.Core.Types (ListType(..), NodeType(..), ContextId)
34 import Gargantext.Core.Types.Query (Limit(..))
35 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
36 import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
37 import Gargantext.Database.Admin.Config (userMaster)
38 import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
39 import Gargantext.Database.Query.Table.Node (defaultList)
40 import Gargantext.Database.Query.Table.Node.Select
41 import Gargantext.Prelude
42 import qualified Data.HashMap.Strict as HM
43 import qualified Data.Map.Strict as Map
44 import qualified Data.Set as Set
45 import qualified Data.List as List
46 import qualified Data.Text as Text
47
48 getMetrics :: FlowCmdM env err m
49 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
50 -> m (HashMap NgramsTerm (ListType, Maybe NgramsTerm), Vector (Scored NgramsTerm))
51 getMetrics cId maybeListId tabType maybeLimit = do
52 (ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
53 -- TODO HashMap
54 pure (ngs, scored myCooc)
55
56
57 getNgramsCooc :: (FlowCmdM env err m)
58 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
59 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
60 , HashMap NgramsTerm (Maybe RootTerm)
61 , HashMap (NgramsTerm, NgramsTerm) Int
62 )
63 getNgramsCooc cId maybeListId tabType maybeLimit = do
64
65 lId <- case maybeListId of
66 Nothing -> defaultList cId
67 Just lId' -> pure lId'
68
69 (ngs', ngs) <- getNgrams lId tabType
70
71 lIds <- selectNodesWithUsername NodeList userMaster
72
73 myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
74 <$> groupNodesByNgrams ngs
75 <$> getContextsByNgramsOnlyUser cId
76 (lIds <> [lId])
77 (ngramsTypeFromTabType tabType)
78 (take' maybeLimit $ HM.keys ngs)
79 pure $ (ngs', ngs, myCooc)
80
81 ------------------------------------------------------------------------
82 ------------------------------------------------------------------------
83 updateNgramsOccurrences :: (FlowCmdM env err m)
84 => CorpusId -> Maybe ListId
85 -> m ()
86 updateNgramsOccurrences cId mlId = do
87 _ <- mapM (updateNgramsOccurrences' cId mlId Nothing) [Terms, Sources, Authors, Institutes]
88 pure ()
89
90
91 updateNgramsOccurrences' :: (FlowCmdM env err m)
92 => CorpusId -> Maybe ListId -> Maybe Limit -> TabType
93 -> m [Int]
94 updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
95
96 lId <- case maybeListId of
97 Nothing -> defaultList cId
98 Just lId' -> pure lId'
99
100 result <- getNgramsOccurrences cId lId tabType maybeLimit
101
102 let
103 toInsert :: [[Action]]
104 toInsert = map (\(ngramsTerm, score)
105 -> [ toField cId
106 , toField lId
107 , toField $ unNgramsTerm ngramsTerm
108 , toField $ toDBid $ ngramsTypeFromTabType tabType
109 , toField score
110 ]
111 )
112 $ HM.toList result
113
114 queryInsert :: Query
115 queryInsert = [sql|
116 WITH input(corpus_id, list_id, terms, type_id, weight) AS (?)
117 INSERT into node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
118 SELECT input.corpus_id,input.list_id,ngrams.id,input.type_id,input.weight FROM input
119 JOIN ngrams on ngrams.terms = input.terms
120 ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
121 DO UPDATE SET weight = excluded.weight
122 RETURNING 1
123 |]
124
125 let fields = map (\t-> QualifiedIdentifier Nothing t)
126 $ map Text.pack ["int4", "int4","text","int4","int4"]
127
128 map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
129
130
131
132 ------------------------------------------------------------------------
133 -- Used for scores in Ngrams Table
134 getNgramsOccurrences :: (FlowCmdM env err m)
135 => CorpusId -> ListId -> TabType -> Maybe Limit
136 -> m (HashMap NgramsTerm Int)
137 getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml
138
139
140
141 getNgramsContexts :: (FlowCmdM env err m)
142 => CorpusId -> ListId -> TabType -> Maybe Limit
143 -> m (HashMap NgramsTerm (Set ContextId))
144 getNgramsContexts cId lId tabType maybeLimit = do
145 (_ngs', ngs) <- getNgrams lId tabType
146 lIds <- selectNodesWithUsername NodeList userMaster
147
148 -- TODO maybe add an option to group here
149 getContextsByNgramsOnlyUser cId
150 (lIds <> [lId])
151 (ngramsTypeFromTabType tabType)
152 (take' maybeLimit $ HM.keys ngs)
153
154
155
156 ------------------------------------------------------------------------
157 updateContextScore :: (FlowCmdM env err m)
158 => CorpusId -> Maybe ListId
159 -> m [Int]
160 updateContextScore cId maybeListId = do
161
162 lId <- case maybeListId of
163 Nothing -> defaultList cId
164 Just lId' -> pure lId'
165
166 result <- getContextsNgramsScore cId lId Terms MapTerm Nothing
167
168 let
169 toInsert :: [[Action]]
170 toInsert = map (\(contextId, score)
171 -> [ toField cId
172 , toField contextId
173 , toField score
174 ]
175 )
176 $ Map.toList result
177
178 queryInsert :: Query
179 queryInsert = [sql|
180 WITH input(node_id, context_id, score) AS (?)
181 UPDATE nodes_contexts nc
182 SET score = input.score
183 FROM input
184 WHERE nc.node_id = input.node_id
185 AND nc.context_id = input.context_id
186 RETURNING 1
187 |]
188
189 let fields = map (\t-> QualifiedIdentifier Nothing t)
190 $ map Text.pack ["int4", "int4","int4"]
191
192 map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
193
194
195
196
197 -- Used for scores in Doc Table
198 getContextsNgramsScore :: (FlowCmdM env err m)
199 => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
200 -> m (Map ContextId Int)
201 getContextsNgramsScore cId lId tabType listType maybeLimit
202 = Map.map Set.size <$> getContextsNgrams cId lId tabType listType maybeLimit
203
204 getContextsNgrams :: (FlowCmdM env err m)
205 => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
206 -> m (Map ContextId (Set NgramsTerm))
207 getContextsNgrams cId lId tabType listType maybeLimit = do
208 (ngs', ngs) <- getNgrams lId tabType
209 lIds <- selectNodesWithUsername NodeList userMaster
210
211 result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser
212 cId
213 (lIds <> [lId])
214 (ngramsTypeFromTabType tabType)
215 ( take' maybeLimit
216 $ HM.keys
217 $ HM.filter (\v -> fst v == listType) ngs'
218 )
219 -- printDebug "getCoocByNgrams" result
220 pure $ Map.fromListWith (<>)
221 $ List.concat
222 $ map (\(ng, contexts) -> List.zip (Set.toList contexts) (List.cycle [Set.singleton ng]))
223 $ HM.toList result
224
225
226 ------------------------------------------------------------------------
227 ------------------------------------------------------------------------
228
229
230 getNgrams :: (HasMail env, HasNodeStory env err m)
231 => ListId -> TabType
232 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
233 , HashMap NgramsTerm (Maybe RootTerm)
234 )
235 getNgrams lId tabType = do
236
237 lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo [lId]
238 -- TODO filterListWithRoot [MapTerm, StopTerm, CandidateTerm] lists
239 let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
240 [[MapTerm], [StopTerm], [CandidateTerm]]
241 pure (lists, maybeSyn)
242
243 -- Some useful Tools
244 take' :: Maybe Limit -> [a] -> [a]
245 take' Nothing xs = xs
246 take' (Just n) xs = take (getLimit n) xs