]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Metrics.hs
[nlp] add sample support for languages to corenlp
[gargantext.git] / src / Gargantext / Database / Action / Metrics.hs
1 {-|
2 Module : Gargantext.Database.Metrics
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Node API
11 -}
12
13 {-# LANGUAGE QuasiQuotes #-}
14
15 module Gargantext.Database.Action.Metrics
16 where
17
18 import Database.PostgreSQL.Simple.SqlQQ (sql)
19 import Data.HashMap.Strict (HashMap)
20 import Data.Map.Strict (Map)
21 import Data.Set (Set)
22 import Database.PostgreSQL.Simple (Query, Only(..))
23 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
24 import Data.Vector (Vector)
25 import Gargantext.Core (HasDBid(toDBid))
26 import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo)
27 import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
28 import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
29 import Gargantext.Core.Mail.Types (HasMail)
30 import Gargantext.Core.NodeStory hiding (runPGSQuery)
31 import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
32 import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
33 import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId)
34 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
35 import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
36 import Gargantext.Database.Admin.Config (userMaster)
37 import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
38 import Gargantext.Database.Query.Table.Node (defaultList)
39 import Gargantext.Database.Query.Table.Node.Select
40 import Gargantext.Prelude
41 import qualified Data.HashMap.Strict as HM
42 import qualified Data.Map.Strict as Map
43 import qualified Data.Set as Set
44 import qualified Data.List as List
45 import qualified Data.Text as Text
46
47 getMetrics :: FlowCmdM env err m
48 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
49 -> m (HashMap NgramsTerm (ListType, Maybe NgramsTerm), Vector (Scored NgramsTerm))
50 getMetrics cId maybeListId tabType maybeLimit = do
51 (ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
52 -- TODO HashMap
53 pure (ngs, scored myCooc)
54
55
56 getNgramsCooc :: (FlowCmdM env err m)
57 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
58 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
59 , HashMap NgramsTerm (Maybe RootTerm)
60 , HashMap (NgramsTerm, NgramsTerm) Int
61 )
62 getNgramsCooc cId maybeListId tabType maybeLimit = do
63
64 lId <- case maybeListId of
65 Nothing -> defaultList cId
66 Just lId' -> pure lId'
67
68 (ngs', ngs) <- getNgrams lId tabType
69
70 lIds <- selectNodesWithUsername NodeList userMaster
71
72 myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
73 <$> groupNodesByNgrams ngs
74 <$> getContextsByNgramsOnlyUser cId
75 (lIds <> [lId])
76 (ngramsTypeFromTabType tabType)
77 (take' maybeLimit $ HM.keys ngs)
78 pure $ (ngs', ngs, myCooc)
79
80 ------------------------------------------------------------------------
81 ------------------------------------------------------------------------
82 updateNgramsOccurrences :: (FlowCmdM env err m)
83 => CorpusId -> Maybe ListId
84 -> m ()
85 updateNgramsOccurrences cId mlId = do
86 _ <- mapM (updateNgramsOccurrences' cId mlId Nothing) [Terms, Sources, Authors, Institutes]
87 pure ()
88
89
90 updateNgramsOccurrences' :: (FlowCmdM env err m)
91 => CorpusId -> Maybe ListId -> Maybe Limit -> TabType
92 -> m [Int]
93 updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
94
95 lId <- case maybeListId of
96 Nothing -> defaultList cId
97 Just lId' -> pure lId'
98
99 result <- getNgramsOccurrences cId lId tabType maybeLimit
100
101 let
102 toInsert :: [[Action]]
103 toInsert = map (\(ngramsTerm, score)
104 -> [ toField cId
105 , toField lId
106 , toField $ unNgramsTerm ngramsTerm
107 , toField $ toDBid $ ngramsTypeFromTabType tabType
108 , toField score
109 ]
110 )
111 $ HM.toList result
112
113 queryInsert :: Query
114 queryInsert = [sql|
115 WITH input(corpus_id, list_id, terms, type_id, weight) AS (?)
116 INSERT into node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
117 SELECT input.corpus_id,input.list_id,ngrams.id,input.type_id,input.weight FROM input
118 JOIN ngrams on ngrams.terms = input.terms
119 ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
120 DO UPDATE SET weight = excluded.weight
121 RETURNING 1
122 |]
123
124 let fields = map (\t-> QualifiedIdentifier Nothing t)
125 $ map Text.pack ["int4", "int4","text","int4","int4"]
126
127 map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
128
129
130
131 ------------------------------------------------------------------------
132 -- Used for scores in Ngrams Table
133 getNgramsOccurrences :: (FlowCmdM env err m)
134 => CorpusId -> ListId -> TabType -> Maybe Limit
135 -> m (HashMap NgramsTerm Int)
136 getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml
137
138
139
140 getNgramsContexts :: (FlowCmdM env err m)
141 => CorpusId -> ListId -> TabType -> Maybe Limit
142 -> m (HashMap NgramsTerm (Set ContextId))
143 getNgramsContexts cId lId tabType maybeLimit = do
144 (_ngs', ngs) <- getNgrams lId tabType
145 lIds <- selectNodesWithUsername NodeList userMaster
146
147 -- TODO maybe add an option to group here
148 getContextsByNgramsOnlyUser cId
149 (lIds <> [lId])
150 (ngramsTypeFromTabType tabType)
151 (take' maybeLimit $ HM.keys ngs)
152
153
154
155 ------------------------------------------------------------------------
156 updateContextScore :: (FlowCmdM env err m)
157 => CorpusId -> Maybe ListId
158 -> m [Int]
159 updateContextScore cId maybeListId = do
160
161 lId <- case maybeListId of
162 Nothing -> defaultList cId
163 Just lId' -> pure lId'
164
165 result <- getContextsNgramsScore cId lId Terms MapTerm Nothing
166
167 let
168 toInsert :: [[Action]]
169 toInsert = map (\(contextId, score)
170 -> [ toField cId
171 , toField contextId
172 , toField score
173 ]
174 )
175 $ Map.toList result
176
177 queryInsert :: Query
178 queryInsert = [sql|
179 WITH input(node_id, context_id, score) AS (?)
180 UPDATE nodes_contexts nc
181 SET score = input.score
182 FROM input
183 WHERE nc.node_id = input.node_id
184 AND nc.context_id = input.context_id
185 RETURNING 1
186 |]
187
188 let fields = map (\t-> QualifiedIdentifier Nothing t)
189 $ map Text.pack ["int4", "int4","int4"]
190
191 map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
192
193
194
195
196 -- Used for scores in Doc Table
197 getContextsNgramsScore :: (FlowCmdM env err m)
198 => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
199 -> m (Map ContextId Int)
200 getContextsNgramsScore cId lId tabType listType maybeLimit
201 = Map.map Set.size <$> getContextsNgrams cId lId tabType listType maybeLimit
202
203 getContextsNgrams :: (FlowCmdM env err m)
204 => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
205 -> m (Map ContextId (Set NgramsTerm))
206 getContextsNgrams cId lId tabType listType maybeLimit = do
207 (ngs', ngs) <- getNgrams lId tabType
208 lIds <- selectNodesWithUsername NodeList userMaster
209
210 result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser
211 cId
212 (lIds <> [lId])
213 (ngramsTypeFromTabType tabType)
214 ( take' maybeLimit
215 $ HM.keys
216 $ HM.filter (\v -> fst v == listType) ngs'
217 )
218 -- printDebug "getCoocByNgrams" result
219 pure $ Map.fromListWith (<>)
220 $ List.concat
221 $ map (\(ng, contexts) -> List.zip (Set.toList contexts) (List.cycle [Set.singleton ng]))
222 $ HM.toList result
223
224
225 ------------------------------------------------------------------------
226 ------------------------------------------------------------------------
227
228
229 getNgrams :: (HasMail env, HasNodeStory env err m)
230 => ListId -> TabType
231 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
232 , HashMap NgramsTerm (Maybe RootTerm)
233 )
234 getNgrams lId tabType = do
235
236 lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo [lId]
237 -- TODO filterListWithRoot [MapTerm, StopTerm, CandidateTerm] lists
238 let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
239 [[MapTerm], [StopTerm], [CandidateTerm]]
240 pure (lists, maybeSyn)
241
242 -- Some useful Tools
243 take' :: Maybe Int -> [a] -> [a]
244 take' Nothing xs = xs
245 take' (Just n) xs = take n xs