]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Query/Table/NodeContext.hs
[openalex] first attempt on integration this
[gargantext.git] / src / Gargantext / Database / Query / Table / NodeContext.hs
1 {-|
2 Module : Gargantext.Database.Query.Table.NodeContext
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
12 -}
13
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
15
16 {-# LANGUAGE Arrows #-}
17 {-# LANGUAGE FunctionalDependencies #-}
18 {-# LANGUAGE LambdaCase #-}
19 {-# LANGUAGE QuasiQuotes #-}
20 {-# LANGUAGE TemplateHaskell #-}
21
22 module Gargantext.Database.Query.Table.NodeContext
23 ( module Gargantext.Database.Schema.NodeContext
24 , queryNodeContextTable
25 , selectDocsDates
26 , selectDocNodes
27 , selectDocs
28 , nodeContextsCategory
29 , nodeContextsScore
30 , getNodeContexts
31 , getNodeContext
32 , updateNodeContextCategory
33 , getContextsForNgrams
34 , ContextForNgrams(..)
35 , getContextsForNgramsTerms
36 , getContextNgrams
37 , getContextNgramsMatchingFTS
38 , ContextForNgramsTerms(..)
39 , insertNodeContext
40 , deleteNodeContext
41 , selectPublicContexts
42 , selectCountDocs
43 )
44 where
45
46 import Control.Arrow (returnA)
47 import Control.Lens (view, (^.))
48 import Data.Maybe (catMaybes)
49 import Data.Time (UTCTime)
50 import Data.Text (Text, splitOn)
51 import Database.PostgreSQL.Simple.SqlQQ (sql)
52 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
53 import Opaleye
54 import qualified Database.PostgreSQL.Simple as PGS (In(..), Query, Only(..))
55 import qualified Opaleye as O
56
57 import Gargantext.Core
58 import Gargantext.Core.Types
59 -- import Gargantext.Core.Types.Search (HyperdataRow(..), toHyperdataRow)
60 import Gargantext.Database.Admin.Types.Hyperdata
61 import Gargantext.Database.Query.Table.Node.Error (HasNodeError, NodeError(DoesNotExist), nodeError)
62 import Gargantext.Database.Prelude
63 import Gargantext.Prelude.Crypto.Hash (Hash)
64 import Gargantext.Database.Schema.Context
65 import Gargantext.Database.Schema.Node
66 import Gargantext.Database.Schema.NodeContext
67 import Gargantext.Prelude
68
69 queryNodeContextTable :: Select NodeContextRead
70 queryNodeContextTable = selectTable nodeContextTable
71
72 -- | not optimized (get all ngrams without filters)
73 _nodesContexts :: Cmd err [NodeContext]
74 _nodesContexts = runOpaQuery queryNodeContextTable
75
76 ------------------------------------------------------------------------
77 -- | Basic NodeContext tools
78 getNodeContexts :: NodeId -> Cmd err [NodeContext]
79 getNodeContexts n = runOpaQuery (selectNodeContexts $ pgNodeId n)
80 where
81 selectNodeContexts :: Field SqlInt4 -> Select NodeContextRead
82 selectNodeContexts n' = proc () -> do
83 ns <- queryNodeContextTable -< ()
84 restrict -< _nc_node_id ns .== n'
85 returnA -< ns
86
87
88 getNodeContext :: HasNodeError err => ContextId -> NodeId -> Cmd err NodeContext
89 getNodeContext c n = do
90 maybeNodeContext <- headMay <$> runOpaQuery (selectNodeContext (pgNodeId c) (pgNodeId n))
91 case maybeNodeContext of
92 Nothing -> nodeError (DoesNotExist c)
93 Just r -> pure r
94 where
95 selectNodeContext :: Field SqlInt4 -> Field SqlInt4 -> Select NodeContextRead
96 selectNodeContext c' n' = proc () -> do
97 ns <- queryNodeContextTable -< ()
98 restrict -< _nc_context_id ns .== c'
99 restrict -< _nc_node_id ns .== n'
100 returnA -< ns
101
102 updateNodeContextCategory :: ContextId -> NodeId -> Int -> Cmd err Int64
103 updateNodeContextCategory cId nId cat = do
104 execPGSQuery upScore (cat, cId, nId)
105 where
106 upScore :: PGS.Query
107 upScore = [sql| UPDATE nodes_contexts
108 SET category = ?
109 WHERE context_id = ?
110 AND node_id = ? |]
111
112 data ContextForNgrams =
113 ContextForNgrams { _cfn_nodeId :: NodeId
114 , _cfn_hash :: Maybe Hash
115 , _cfn_userId :: UserId
116 , _cfn_parentId :: Maybe ParentId
117 , _cfn_c_title :: ContextTitle
118 , _cfn_date :: UTCTime
119 , _cfn_hyperdata :: HyperdataDocument }
120 getContextsForNgrams :: HasNodeError err
121 => NodeId
122 -> [Int]
123 -> Cmd err [ContextForNgrams]
124 getContextsForNgrams cId ngramsIds = do
125 res <- runPGSQuery query (cId, PGS.In ngramsIds)
126 pure $ (\( _cfn_nodeId
127 , _cfn_hash
128 , _cfn_userId
129 , _cfn_parentId
130 , _cfn_c_title
131 , _cfn_date
132 , _cfn_hyperdata) -> ContextForNgrams { .. }) <$> res
133 where
134 query :: PGS.Query
135 query = [sql| SELECT contexts.id, hash_id, typename, user_id, parent_id, name, date, hyperdata
136 FROM contexts
137 JOIN context_node_ngrams ON contexts.id = context_node_ngrams.context_id
138 JOIN nodes_contexts ON contexts.id = nodes_contexts.context_id
139 WHERE nodes_contexts.node_id = ?
140 AND context_node_ngrams.ngrams_id IN ? |]
141
142 data ContextForNgramsTerms =
143 ContextForNgramsTerms { _cfnt_nodeId :: NodeId
144 , _cfnt_hash :: Maybe Hash
145 , _cfnt_nodeTypeId :: NodeTypeId
146 , _cfnt_userId :: UserId
147 , _cfnt_parentId :: Maybe ParentId
148 , _cfnt_c_title :: ContextTitle
149 , _cfnt_date :: UTCTime
150 , _cfnt_hyperdata :: HyperdataDocument
151 , _cfnt_score :: Maybe Double
152 , _cfnt_category :: Maybe Int }
153 getContextsForNgramsTerms :: HasNodeError err
154 => NodeId
155 -> [Text]
156 -> Cmd err [ContextForNgramsTerms]
157 getContextsForNgramsTerms cId ngramsTerms = do
158 res <- runPGSQuery query (cId, PGS.In ngramsTerms)
159 pure $ (\( _cfnt_nodeId
160 , _cfnt_hash
161 , _cfnt_nodeTypeId
162 , _cfnt_userId
163 , _cfnt_parentId
164 , _cfnt_c_title
165 , _cfnt_date
166 , _cfnt_hyperdata
167 , _cfnt_score
168 , _cfnt_category) -> ContextForNgramsTerms { .. }) <$> res
169 where
170 query :: PGS.Query
171 query = [sql| SELECT t.id, t.hash_id, t.typename, t.user_id, t.parent_id, t.name, t.date, t.hyperdata, t.score, t.category
172 FROM (
173 SELECT DISTINCT ON (contexts.id)
174 contexts.id AS id,
175 hash_id,
176 typename,
177 user_id,
178 parent_id,
179 name,
180 date,
181 hyperdata,
182 nodes_contexts.score AS score,
183 nodes_contexts.category AS category,
184 context_node_ngrams.doc_count AS doc_count
185 FROM contexts
186 JOIN context_node_ngrams ON contexts.id = context_node_ngrams.context_id
187 JOIN nodes_contexts ON contexts.id = nodes_contexts.context_id
188 JOIN ngrams ON context_node_ngrams.ngrams_id = ngrams.id
189 WHERE nodes_contexts.node_id = ?
190 AND ngrams.terms IN ?) t
191 ORDER BY t.doc_count DESC |]
192
193
194
195 -- | Query the `context_node_ngrams` table and return ngrams for given
196 -- `context_id` and `list_id`.
197 -- WARNING: `context_node_ngrams` can be outdated. This is because it
198 -- is expensive to keep all ngrams matching a given context and if
199 -- someone adds an ngram, we need to recompute its m2m relation to all
200 -- existing documents.
201 getContextNgrams :: HasNodeError err
202 => NodeId
203 -> NodeId
204 -> Cmd err [Text]
205 getContextNgrams contextId listId = do
206 res <- runPGSQuery query (contextId, listId)
207 pure $ (\(PGS.Only term) -> term) <$> res
208
209 where
210 query :: PGS.Query
211 query = [sql| SELECT ngrams.terms
212 FROM context_node_ngrams
213 JOIN ngrams ON ngrams.id = ngrams_id
214 WHERE context_id = ?
215 AND node_id = ? |]
216
217
218 -- | Query the `contexts` table and return ngrams for given context_id
219 -- and list_id that match the search tsvector.
220 -- NOTE This is poor man's tokenization that is used as a hint for the
221 -- frontend highlighter.
222 -- NOTE We prefer `plainto_tsquery` over `phraseto_tsquery` as it is
223 -- more permissive (i.e. ignores word ordering). See
224 -- https://www.peterullrich.com/complete-guide-to-full-text-search-with-postgres-and-ecto
225 getContextNgramsMatchingFTS :: HasNodeError err
226 => NodeId
227 -> NodeId
228 -> Cmd err [Text]
229 getContextNgramsMatchingFTS contextId listId = do
230 res <- runPGSQuery query (listId, contextId)
231 pure $ (\(PGS.Only term) -> term) <$> res
232
233 where
234 query :: PGS.Query
235 query = [sql| WITH constants AS
236 (SELECT ? AS list_id, ? AS context_id),
237 ngrams_ids AS
238 (SELECT ngrams_id
239 FROM node_stories
240 CROSS JOIN constants
241 WHERE node_id = constants.list_id
242 UNION SELECT ngrams_id
243 FROM node_ngrams
244 CROSS JOIN constants
245 WHERE node_id = constants.list_id)
246 SELECT DISTINCT ngrams.terms
247 FROM ngrams
248 JOIN ngrams_ids ON ngrams_ids.ngrams_id = ngrams.id
249 CROSS JOIN constants
250 -- JOIN node_ngrams ON node_ngrams.ngrams_id = ngrams.id
251 CROSS JOIN contexts
252 WHERE contexts.id = constants.context_id
253 -- AND node_ngrams.node_id = ?
254 AND (contexts.search @@ plainto_tsquery(ngrams.terms)
255 OR contexts.search @@ plainto_tsquery('french', ngrams.terms)) |]
256 ------------------------------------------------------------------------
257 insertNodeContext :: [NodeContext] -> Cmd err Int
258 insertNodeContext ns = mkCmd $ \conn -> fromIntegral <$> (runInsert_ conn
259 $ Insert nodeContextTable ns' rCount (Just DoNothing))
260 where
261 ns' :: [NodeContextWrite]
262 ns' = map (\(NodeContext i n c x y)
263 -> NodeContext (sqlInt4 <$> i)
264 (pgNodeId n)
265 (pgNodeId c)
266 (sqlDouble <$> x)
267 (sqlInt4 <$> y)
268 ) ns
269
270
271 ------------------------------------------------------------------------
272 type Node_Id = NodeId
273 type Context_Id = NodeId
274
275 deleteNodeContext :: Node_Id -> Context_Id -> Cmd err Int
276 deleteNodeContext n c = mkCmd $ \conn ->
277 fromIntegral <$> runDelete_ conn
278 (Delete nodeContextTable
279 (\(NodeContext _ n_id c_id _ _) -> n_id .== pgNodeId n
280 .&& c_id .== pgNodeId c
281 )
282 rCount
283 )
284
285 ------------------------------------------------------------------------
286 -- | Favorite management
287 nodeContextsCategory :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
288 nodeContextsCategory inputData = map (\(PGS.Only a) -> a)
289 <$> runPGSQuery catSelect (PGS.Only $ Values fields inputData)
290 where
291 fields = map (QualifiedIdentifier Nothing) ["int4","int4","int4"]
292 catSelect :: PGS.Query
293 catSelect = [sql| UPDATE nodes_contexts as nn0
294 SET category = nn1.category
295 FROM (?) as nn1(node_id,context_id,category)
296 WHERE nn0.node_id = nn1.node_id
297 AND nn0.context_id = nn1.context_id
298 RETURNING nn1.node_id
299 |]
300
301 ------------------------------------------------------------------------
302 -- | Score management
303 nodeContextsScore :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
304 nodeContextsScore inputData = map (\(PGS.Only a) -> a)
305 <$> runPGSQuery catScore (PGS.Only $ Values fields inputData)
306 where
307 fields = map (QualifiedIdentifier Nothing) ["int4","int4","int4"]
308 catScore :: PGS.Query
309 catScore = [sql| UPDATE nodes_contexts as nn0
310 SET score = nn1.score
311 FROM (?) as nn1(node_id, context_id, score)
312 WHERE nn0.node_id = nn1.node_id
313 AND nn0.context_id = nn1.context_id
314 RETURNING nn1.context_id
315 |]
316
317
318 ------------------------------------------------------------------------
319 selectCountDocs :: HasDBid NodeType => CorpusId -> Cmd err Int
320 selectCountDocs cId = runCountOpaQuery (queryCountDocs cId)
321 where
322 queryCountDocs cId' = proc () -> do
323 (c, nc) <- joinInCorpus -< ()
324 restrict -< restrictMaybe nc $ \nc' -> (nc' ^. nc_node_id) .== pgNodeId cId' .&&
325 (nc' ^. nc_category) .>= sqlInt4 1
326 restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
327 returnA -< c
328
329
330 -- | TODO use UTCTime fast
331 selectDocsDates :: HasDBid NodeType => CorpusId -> Cmd err [Text]
332 selectDocsDates cId = map (head' "selectDocsDates" . splitOn "-")
333 <$> catMaybes
334 <$> map (view hd_publication_date)
335 <$> selectDocs cId
336
337 selectDocs :: HasDBid NodeType => CorpusId -> Cmd err [HyperdataDocument]
338 selectDocs cId = runOpaQuery (queryDocs cId)
339
340 queryDocs :: HasDBid NodeType => CorpusId -> O.Select (Field SqlJsonb)
341 queryDocs cId = proc () -> do
342 (c, nn) <- joinInCorpus -< ()
343 restrict -< restrictMaybe nn $ \nn' -> (nn' ^. nc_node_id) .== pgNodeId cId .&&
344 (nn' ^. nc_category) .>= sqlInt4 1
345 restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
346 returnA -< view (context_hyperdata) c
347
348 selectDocNodes :: HasDBid NodeType => CorpusId -> Cmd err [Context HyperdataDocument]
349 selectDocNodes cId = runOpaQuery (queryDocNodes cId)
350
351 queryDocNodes :: HasDBid NodeType => CorpusId -> O.Select ContextRead
352 queryDocNodes cId = proc () -> do
353 (c, nc) <- joinInCorpus -< ()
354 -- restrict -< restrictMaybe nc $ \nc' -> (nc' ^. nc_node_id) .== pgNodeId cId .&&
355 -- (nc' ^. nc_category) .>= sqlInt4 1
356 restrict -< matchMaybe nc $ \case
357 Nothing -> toFields True
358 Just nc' -> (nc' ^. nc_node_id) .== pgNodeId cId .&&
359 (nc' ^. nc_category) .>= sqlInt4 1
360 restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
361 returnA -< c
362
363 joinInCorpus :: O.Select (ContextRead, MaybeFields NodeContextRead)
364 joinInCorpus = proc () -> do
365 c <- queryContextTable -< ()
366 nc <- optionalRestrict queryNodeContextTable -<
367 (\nc' -> (c ^. context_id) .== (nc' ^. nc_context_id))
368 returnA -< (c, nc)
369
370
371 joinOn1 :: O.Select (NodeRead, MaybeFields NodeContextRead)
372 joinOn1 = proc () -> do
373 n <- queryNodeTable -< ()
374 nc <- optionalRestrict queryNodeContextTable -<
375 (\nc' -> (nc' ^. nc_node_id) .== (n ^. node_id))
376 returnA -< (n, nc)
377
378
379 ------------------------------------------------------------------------
380 selectPublicContexts :: HasDBid NodeType => (Hyperdata a, DefaultFromField SqlJsonb a)
381 => Cmd err [(Node a, Maybe Int)]
382 selectPublicContexts = runOpaQuery (queryWithType NodeFolderPublic)
383
384 queryWithType :: HasDBid NodeType => NodeType -> O.Select (NodeRead, MaybeFields (Field SqlInt4))
385 queryWithType nt = proc () -> do
386 (n, nc) <- joinOn1 -< ()
387 restrict -< (n ^. node_typename) .== sqlInt4 (toDBid nt)
388 returnA -< (n, view nc_context_id <$> nc)