]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Query/Table/NodeContext.hs
[ngrams] fix score ordering
[gargantext.git] / src / Gargantext / Database / Query / Table / NodeContext.hs
1 {-|
2 Module : Gargantext.Database.Query.Table.NodeContext
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
12 -}
13
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
15
16 {-# LANGUAGE Arrows #-}
17 {-# LANGUAGE FunctionalDependencies #-}
18 {-# LANGUAGE LambdaCase #-}
19 {-# LANGUAGE QuasiQuotes #-}
20 {-# LANGUAGE TemplateHaskell #-}
21
22 module Gargantext.Database.Query.Table.NodeContext
23 ( module Gargantext.Database.Schema.NodeContext
24 , queryNodeContextTable
25 , selectDocsDates
26 , selectDocNodes
27 , selectDocs
28 , nodeContextsCategory
29 , nodeContextsScore
30 , getNodeContexts
31 , getNodeContext
32 , updateNodeContextCategory
33 , getContextsForNgrams
34 , ContextForNgrams(..)
35 , getContextsForNgramsTerms
36 , ContextForNgramsTerms(..)
37 , insertNodeContext
38 , deleteNodeContext
39 , selectPublicContexts
40 , selectCountDocs
41 )
42 where
43
44 import Control.Arrow (returnA)
45 import Control.Lens (view, (^.))
46 import Data.Maybe (catMaybes)
47 import Data.Time (UTCTime)
48 import Data.Text (Text, splitOn)
49 import Database.PostgreSQL.Simple.SqlQQ (sql)
50 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
51 import Opaleye
52 import qualified Database.PostgreSQL.Simple as PGS (In(..), Query, Only(..))
53 import qualified Opaleye as O
54
55 import Gargantext.Core
56 import Gargantext.Core.Types
57 -- import Gargantext.Core.Types.Search (HyperdataRow(..), toHyperdataRow)
58 import Gargantext.Database.Admin.Types.Hyperdata
59 import Gargantext.Database.Query.Table.Node.Error (HasNodeError, NodeError(DoesNotExist), nodeError)
60 import Gargantext.Database.Prelude
61 import Gargantext.Prelude.Crypto.Hash (Hash)
62 import Gargantext.Database.Schema.Context
63 import Gargantext.Database.Schema.Node
64 import Gargantext.Database.Schema.NodeContext
65 import Gargantext.Prelude
66
67 queryNodeContextTable :: Select NodeContextRead
68 queryNodeContextTable = selectTable nodeContextTable
69
70 -- | not optimized (get all ngrams without filters)
71 _nodesContexts :: Cmd err [NodeContext]
72 _nodesContexts = runOpaQuery queryNodeContextTable
73
74 ------------------------------------------------------------------------
75 -- | Basic NodeContext tools
76 getNodeContexts :: NodeId -> Cmd err [NodeContext]
77 getNodeContexts n = runOpaQuery (selectNodeContexts $ pgNodeId n)
78 where
79 selectNodeContexts :: Field SqlInt4 -> Select NodeContextRead
80 selectNodeContexts n' = proc () -> do
81 ns <- queryNodeContextTable -< ()
82 restrict -< _nc_node_id ns .== n'
83 returnA -< ns
84
85
86 getNodeContext :: HasNodeError err => ContextId -> NodeId -> Cmd err NodeContext
87 getNodeContext c n = do
88 maybeNodeContext <- headMay <$> runOpaQuery (selectNodeContext (pgNodeId c) (pgNodeId n))
89 case maybeNodeContext of
90 Nothing -> nodeError (DoesNotExist c)
91 Just r -> pure r
92 where
93 selectNodeContext :: Field SqlInt4 -> Field SqlInt4 -> Select NodeContextRead
94 selectNodeContext c' n' = proc () -> do
95 ns <- queryNodeContextTable -< ()
96 restrict -< _nc_context_id ns .== c'
97 restrict -< _nc_node_id ns .== n'
98 returnA -< ns
99
100 updateNodeContextCategory :: ContextId -> NodeId -> Int -> Cmd err Int64
101 updateNodeContextCategory cId nId cat = do
102 execPGSQuery upScore (cat, cId, nId)
103 where
104 upScore :: PGS.Query
105 upScore = [sql| UPDATE nodes_contexts
106 SET category = ?
107 WHERE context_id = ?
108 AND node_id = ? |]
109
110 data ContextForNgrams =
111 ContextForNgrams { _cfn_nodeId :: NodeId
112 , _cfn_hash :: Maybe Hash
113 , _cfn_userId :: UserId
114 , _cfn_parentId :: Maybe ParentId
115 , _cfn_c_title :: ContextTitle
116 , _cfn_date :: UTCTime
117 , _cfn_hyperdata :: HyperdataDocument }
118 getContextsForNgrams :: HasNodeError err
119 => NodeId
120 -> [Int]
121 -> Cmd err [ContextForNgrams]
122 getContextsForNgrams cId ngramsIds = do
123 res <- runPGSQuery query (cId, PGS.In ngramsIds)
124 pure $ (\( _cfn_nodeId
125 , _cfn_hash
126 , _cfn_userId
127 , _cfn_parentId
128 , _cfn_c_title
129 , _cfn_date
130 , _cfn_hyperdata) -> ContextForNgrams { .. }) <$> res
131 where
132 query :: PGS.Query
133 query = [sql| SELECT contexts.id, hash_id, typename, user_id, parent_id, name, date, hyperdata
134 FROM contexts
135 JOIN context_node_ngrams ON contexts.id = context_node_ngrams.context_id
136 JOIN nodes_contexts ON contexts.id = nodes_contexts.context_id
137 WHERE nodes_contexts.node_id = ?
138 AND context_node_ngrams.ngrams_id IN ? |]
139
140 data ContextForNgramsTerms =
141 ContextForNgramsTerms { _cfnt_nodeId :: NodeId
142 , _cfnt_hash :: Maybe Hash
143 , _cfnt_nodeTypeId :: NodeTypeId
144 , _cfnt_userId :: UserId
145 , _cfnt_parentId :: Maybe ParentId
146 , _cfnt_c_title :: ContextTitle
147 , _cfnt_date :: UTCTime
148 , _cfnt_hyperdata :: HyperdataDocument
149 , _cfnt_score :: Maybe Double
150 , _cfnt_category :: Maybe Int }
151 getContextsForNgramsTerms :: HasNodeError err
152 => NodeId
153 -> [Text]
154 -> Cmd err [ContextForNgramsTerms]
155 getContextsForNgramsTerms cId ngramsTerms = do
156 res <- runPGSQuery query (cId, PGS.In ngramsTerms)
157 pure $ (\( _cfnt_nodeId
158 , _cfnt_hash
159 , _cfnt_nodeTypeId
160 , _cfnt_userId
161 , _cfnt_parentId
162 , _cfnt_c_title
163 , _cfnt_date
164 , _cfnt_hyperdata
165 , _cfnt_score
166 , _cfnt_category) -> ContextForNgramsTerms { .. }) <$> res
167 where
168 query :: PGS.Query
169 query = [sql| SELECT t.id, t.hash_id, t.typename, t.user_id, t.parent_id, t.name, t.date, t.hyperdata, t.score, t.category
170 FROM (
171 SELECT DISTINCT ON (contexts.id)
172 contexts.id AS id,
173 hash_id,
174 typename,
175 user_id,
176 parent_id,
177 name,
178 date,
179 hyperdata,
180 nodes_contexts.score AS score,
181 nodes_contexts.category AS category,
182 context_node_ngrams.doc_count AS doc_count
183 FROM contexts
184 JOIN context_node_ngrams ON contexts.id = context_node_ngrams.context_id
185 JOIN nodes_contexts ON contexts.id = nodes_contexts.context_id
186 JOIN ngrams ON context_node_ngrams.ngrams_id = ngrams.id
187 WHERE nodes_contexts.node_id = ?
188 AND ngrams.terms IN ?) t
189 ORDER BY t.doc_count DESC |]
190
191 ------------------------------------------------------------------------
192 insertNodeContext :: [NodeContext] -> Cmd err Int
193 insertNodeContext ns = mkCmd $ \conn -> fromIntegral <$> (runInsert_ conn
194 $ Insert nodeContextTable ns' rCount (Just DoNothing))
195 where
196 ns' :: [NodeContextWrite]
197 ns' = map (\(NodeContext i n c x y)
198 -> NodeContext (sqlInt4 <$> i)
199 (pgNodeId n)
200 (pgNodeId c)
201 (sqlDouble <$> x)
202 (sqlInt4 <$> y)
203 ) ns
204
205
206 ------------------------------------------------------------------------
207 type Node_Id = NodeId
208 type Context_Id = NodeId
209
210 deleteNodeContext :: Node_Id -> Context_Id -> Cmd err Int
211 deleteNodeContext n c = mkCmd $ \conn ->
212 fromIntegral <$> runDelete_ conn
213 (Delete nodeContextTable
214 (\(NodeContext _ n_id c_id _ _) -> n_id .== pgNodeId n
215 .&& c_id .== pgNodeId c
216 )
217 rCount
218 )
219
220 ------------------------------------------------------------------------
221 -- | Favorite management
222 nodeContextsCategory :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
223 nodeContextsCategory inputData = map (\(PGS.Only a) -> a)
224 <$> runPGSQuery catSelect (PGS.Only $ Values fields inputData)
225 where
226 fields = map (QualifiedIdentifier Nothing) ["int4","int4","int4"]
227 catSelect :: PGS.Query
228 catSelect = [sql| UPDATE nodes_contexts as nn0
229 SET category = nn1.category
230 FROM (?) as nn1(node_id,context_id,category)
231 WHERE nn0.node_id = nn1.node_id
232 AND nn0.context_id = nn1.context_id
233 RETURNING nn1.node_id
234 |]
235
236 ------------------------------------------------------------------------
237 -- | Score management
238 nodeContextsScore :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
239 nodeContextsScore inputData = map (\(PGS.Only a) -> a)
240 <$> runPGSQuery catScore (PGS.Only $ Values fields inputData)
241 where
242 fields = map (QualifiedIdentifier Nothing) ["int4","int4","int4"]
243 catScore :: PGS.Query
244 catScore = [sql| UPDATE nodes_contexts as nn0
245 SET score = nn1.score
246 FROM (?) as nn1(node_id, context_id, score)
247 WHERE nn0.node_id = nn1.node_id
248 AND nn0.context_id = nn1.context_id
249 RETURNING nn1.context_id
250 |]
251
252
253 ------------------------------------------------------------------------
254 selectCountDocs :: HasDBid NodeType => CorpusId -> Cmd err Int
255 selectCountDocs cId = runCountOpaQuery (queryCountDocs cId)
256 where
257 queryCountDocs cId' = proc () -> do
258 (c, nc) <- joinInCorpus -< ()
259 restrict -< restrictMaybe nc $ \nc' -> (nc' ^. nc_node_id) .== pgNodeId cId' .&&
260 (nc' ^. nc_category) .>= sqlInt4 1
261 restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
262 returnA -< c
263
264
265 -- | TODO use UTCTime fast
266 selectDocsDates :: HasDBid NodeType => CorpusId -> Cmd err [Text]
267 selectDocsDates cId = map (head' "selectDocsDates" . splitOn "-")
268 <$> catMaybes
269 <$> map (view hd_publication_date)
270 <$> selectDocs cId
271
272 selectDocs :: HasDBid NodeType => CorpusId -> Cmd err [HyperdataDocument]
273 selectDocs cId = runOpaQuery (queryDocs cId)
274
275 queryDocs :: HasDBid NodeType => CorpusId -> O.Select (Field SqlJsonb)
276 queryDocs cId = proc () -> do
277 (c, nn) <- joinInCorpus -< ()
278 restrict -< restrictMaybe nn $ \nn' -> (nn' ^. nc_node_id) .== pgNodeId cId .&&
279 (nn' ^. nc_category) .>= sqlInt4 1
280 restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
281 returnA -< view (context_hyperdata) c
282
283 selectDocNodes :: HasDBid NodeType => CorpusId -> Cmd err [Context HyperdataDocument]
284 selectDocNodes cId = runOpaQuery (queryDocNodes cId)
285
286 queryDocNodes :: HasDBid NodeType => CorpusId -> O.Select ContextRead
287 queryDocNodes cId = proc () -> do
288 (c, nc) <- joinInCorpus -< ()
289 -- restrict -< restrictMaybe nc $ \nc' -> (nc' ^. nc_node_id) .== pgNodeId cId .&&
290 -- (nc' ^. nc_category) .>= sqlInt4 1
291 restrict -< matchMaybe nc $ \case
292 Nothing -> toFields True
293 Just nc' -> (nc' ^. nc_node_id) .== pgNodeId cId .&&
294 (nc' ^. nc_category) .>= sqlInt4 1
295 restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
296 returnA -< c
297
298 joinInCorpus :: O.Select (ContextRead, MaybeFields NodeContextRead)
299 joinInCorpus = proc () -> do
300 c <- queryContextTable -< ()
301 nc <- optionalRestrict queryNodeContextTable -<
302 (\nc' -> (c ^. context_id) .== (nc' ^. nc_context_id))
303 returnA -< (c, nc)
304
305
306 joinOn1 :: O.Select (NodeRead, MaybeFields NodeContextRead)
307 joinOn1 = proc () -> do
308 n <- queryNodeTable -< ()
309 nc <- optionalRestrict queryNodeContextTable -<
310 (\nc' -> (nc' ^. nc_node_id) .== (n ^. node_id))
311 returnA -< (n, nc)
312
313
314 ------------------------------------------------------------------------
315 selectPublicContexts :: HasDBid NodeType => (Hyperdata a, DefaultFromField SqlJsonb a)
316 => Cmd err [(Node a, Maybe Int)]
317 selectPublicContexts = runOpaQuery (queryWithType NodeFolderPublic)
318
319 queryWithType :: HasDBid NodeType => NodeType -> O.Select (NodeRead, MaybeFields (Field SqlInt4))
320 queryWithType nt = proc () -> do
321 (n, nc) <- joinOn1 -< ()
322 restrict -< (n ^. node_typename) .== sqlInt4 (toDBid nt)
323 returnA -< (n, view nc_context_id <$> nc)