]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Metrics.hs
[openalex] use search instead of filter
[gargantext.git] / src / Gargantext / Database / Action / Metrics.hs
1 {-|
2 Module : Gargantext.Database.Metrics
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Node API
11 -}
12
13 {-# LANGUAGE QuasiQuotes #-}
14
15 module Gargantext.Database.Action.Metrics
16 where
17
18 import Database.PostgreSQL.Simple.SqlQQ (sql)
19 import Data.HashMap.Strict (HashMap)
20 import Data.Map.Strict (Map)
21 import Data.Set (Set)
22 import Database.PostgreSQL.Simple (Query, Only(..))
23 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
24 import Data.Vector (Vector)
25 import Gargantext.Core (HasDBid(toDBid))
26 import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo)
27 import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
28 import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
29 import Gargantext.Core.Mail.Types (HasMail)
30 import Gargantext.Core.NodeStory hiding (runPGSQuery)
31 import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
32 import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
33 import Gargantext.Core.Types (ListType(..), NodeType(..), ContextId)
34 import Gargantext.Core.Types.Query (Limit(..))
35 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
36 import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
37 import Gargantext.Database.Admin.Config (userMaster)
38 -- import Gargantext.Database.Action.Metrics.NgramsByContext (refreshNgramsMaterialized)
39 import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
40 import Gargantext.Database.Query.Table.Node (defaultList)
41 import Gargantext.Database.Query.Table.Node.Select
42 import Gargantext.Prelude
43 import qualified Data.HashMap.Strict as HM
44 import qualified Data.Map.Strict as Map
45 import qualified Data.Set as Set
46 import qualified Data.List as List
47 import qualified Data.Text as Text
48
49 getMetrics :: FlowCmdM env err m
50 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
51 -> m (HashMap NgramsTerm (ListType, Maybe NgramsTerm), Vector (Scored NgramsTerm))
52 getMetrics cId maybeListId tabType maybeLimit = do
53 (ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
54 -- TODO HashMap
55 pure (ngs, scored myCooc)
56
57
58 getNgramsCooc :: (FlowCmdM env err m)
59 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
60 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
61 , HashMap NgramsTerm (Maybe RootTerm)
62 , HashMap (NgramsTerm, NgramsTerm) Int
63 )
64 getNgramsCooc cId maybeListId tabType maybeLimit = do
65
66 lId <- case maybeListId of
67 Nothing -> defaultList cId
68 Just lId' -> pure lId'
69
70 (ngs', ngs) <- getNgrams lId tabType
71
72 lIds <- selectNodesWithUsername NodeList userMaster
73
74 myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
75 <$> groupNodesByNgrams ngs
76 <$> getContextsByNgramsOnlyUser cId
77 (lIds <> [lId])
78 (ngramsTypeFromTabType tabType)
79 (take' maybeLimit $ HM.keys ngs)
80 pure $ (ngs', ngs, myCooc)
81
82 ------------------------------------------------------------------------
83 ------------------------------------------------------------------------
84 updateNgramsOccurrences :: (FlowCmdM env err m)
85 => CorpusId -> Maybe ListId
86 -> m ()
87 updateNgramsOccurrences cId mlId = do
88 _ <- mapM (updateNgramsOccurrences' cId mlId Nothing) [Terms, Sources, Authors, Institutes]
89 pure ()
90
91
92 updateNgramsOccurrences' :: (FlowCmdM env err m)
93 => CorpusId -> Maybe ListId -> Maybe Limit -> TabType
94 -> m [Int]
95 updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
96
97 lId <- case maybeListId of
98 Nothing -> defaultList cId
99 Just lId' -> pure lId'
100
101 result <- getNgramsOccurrences cId lId tabType maybeLimit
102
103 let
104 toInsert :: [[Action]]
105 toInsert = map (\(ngramsTerm, score)
106 -> [ toField cId
107 , toField lId
108 , toField $ unNgramsTerm ngramsTerm
109 , toField $ toDBid $ ngramsTypeFromTabType tabType
110 , toField score
111 ]
112 )
113 $ HM.toList result
114
115 queryInsert :: Query
116 queryInsert = [sql|
117 WITH input(corpus_id, list_id, terms, type_id, weight) AS (?)
118 INSERT into node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
119 SELECT input.corpus_id,input.list_id,ngrams.id,input.type_id,input.weight FROM input
120 JOIN ngrams on ngrams.terms = input.terms
121 ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
122 DO UPDATE SET weight = excluded.weight
123 RETURNING 1
124 |]
125
126 let fields = map (\t-> QualifiedIdentifier Nothing t)
127 $ map Text.pack ["int4", "int4","text","int4","int4"]
128
129 res <- map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
130
131 -- _ <- map (\(Only a) -> a) <$> runPGSQuery [sql|refresh materialized view context_node_ngrams_view;|] ()
132 -- _ <- refreshNgramsMaterialized
133 pure res
134
135
136
137 ------------------------------------------------------------------------
138 -- Used for scores in Ngrams Table
139 getNgramsOccurrences :: (FlowCmdM env err m)
140 => CorpusId -> ListId -> TabType -> Maybe Limit
141 -> m (HashMap NgramsTerm Int)
142 getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml
143
144
145
146 getNgramsContexts :: (FlowCmdM env err m)
147 => CorpusId -> ListId -> TabType -> Maybe Limit
148 -> m (HashMap NgramsTerm (Set ContextId))
149 getNgramsContexts cId lId tabType maybeLimit = do
150 (_ngs', ngs) <- getNgrams lId tabType
151 lIds <- selectNodesWithUsername NodeList userMaster
152
153 -- TODO maybe add an option to group here
154 getContextsByNgramsOnlyUser cId
155 (lIds <> [lId])
156 (ngramsTypeFromTabType tabType)
157 (take' maybeLimit $ HM.keys ngs)
158
159
160
161 ------------------------------------------------------------------------
162 updateContextScore :: (FlowCmdM env err m)
163 => CorpusId -> Maybe ListId
164 -> m [Int]
165 updateContextScore cId maybeListId = do
166
167 lId <- case maybeListId of
168 Nothing -> defaultList cId
169 Just lId' -> pure lId'
170
171 result <- getContextsNgramsScore cId lId Terms MapTerm Nothing
172
173 let
174 toInsert :: [[Action]]
175 toInsert = map (\(contextId, score)
176 -> [ toField cId
177 , toField contextId
178 , toField score
179 ]
180 )
181 $ Map.toList result
182
183 queryInsert :: Query
184 queryInsert = [sql|
185 WITH input(node_id, context_id, score) AS (?)
186 UPDATE nodes_contexts nc
187 SET score = input.score
188 FROM input
189 WHERE nc.node_id = input.node_id
190 AND nc.context_id = input.context_id
191 RETURNING 1
192 |]
193
194 let fields = map (\t-> QualifiedIdentifier Nothing t)
195 $ map Text.pack ["int4", "int4","int4"]
196
197 map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
198
199
200
201
202 -- Used for scores in Doc Table
203 getContextsNgramsScore :: (FlowCmdM env err m)
204 => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
205 -> m (Map ContextId Int)
206 getContextsNgramsScore cId lId tabType listType maybeLimit
207 = Map.map Set.size <$> getContextsNgrams cId lId tabType listType maybeLimit
208
209 getContextsNgrams :: (FlowCmdM env err m)
210 => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
211 -> m (Map ContextId (Set NgramsTerm))
212 getContextsNgrams cId lId tabType listType maybeLimit = do
213 (ngs', ngs) <- getNgrams lId tabType
214 lIds <- selectNodesWithUsername NodeList userMaster
215
216 result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser
217 cId
218 (lIds <> [lId])
219 (ngramsTypeFromTabType tabType)
220 ( take' maybeLimit
221 $ HM.keys
222 $ HM.filter (\v -> fst v == listType) ngs'
223 )
224 -- printDebug "getCoocByNgrams" result
225 pure $ Map.fromListWith (<>)
226 $ List.concat
227 $ map (\(ng, contexts) -> List.zip (Set.toList contexts) (List.cycle [Set.singleton ng]))
228 $ HM.toList result
229
230
231 ------------------------------------------------------------------------
232 ------------------------------------------------------------------------
233
234
235 getNgrams :: (HasMail env, HasNodeStory env err m)
236 => ListId -> TabType
237 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
238 , HashMap NgramsTerm (Maybe RootTerm)
239 )
240 getNgrams lId tabType = do
241
242 lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo [lId]
243 -- TODO filterListWithRoot [MapTerm, StopTerm, CandidateTerm] lists
244 let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
245 [[MapTerm], [StopTerm], [CandidateTerm]]
246 pure (lists, maybeSyn)
247
248 -- Some useful Tools
249 take' :: Maybe Limit -> [a] -> [a]
250 take' Nothing xs = xs
251 take' (Just n) xs = take (getLimit n) xs