2 Module : Gargantext.Database.Query.Table.NodeContext
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
16 {-# LANGUAGE Arrows #-}
17 {-# LANGUAGE FunctionalDependencies #-}
18 {-# LANGUAGE LambdaCase #-}
19 {-# LANGUAGE QuasiQuotes #-}
20 {-# LANGUAGE TemplateHaskell #-}
22 module Gargantext.Database.Query.Table.NodeContext
23 ( module Gargantext.Database.Schema.NodeContext
24 , queryNodeContextTable
28 , nodeContextsCategory
32 , updateNodeContextCategory
33 , getContextsForNgrams
34 , ContextForNgrams(..)
35 , getContextsForNgramsTerms
37 , getContextNgramsMatchingFTS
38 , ContextForNgramsTerms(..)
41 , selectPublicContexts
46 import Control.Arrow (returnA)
47 import Control.Lens (view, (^.))
48 import Data.Maybe (catMaybes)
49 import Data.Time (UTCTime)
50 import Data.Text (Text, splitOn)
51 import Database.PostgreSQL.Simple.SqlQQ (sql)
52 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
54 import qualified Database.PostgreSQL.Simple as PGS (In(..), Query, Only(..))
55 import qualified Opaleye as O
57 import Gargantext.Core
58 import Gargantext.Core.Types
59 -- import Gargantext.Core.Types.Search (HyperdataRow(..), toHyperdataRow)
60 import Gargantext.Database.Admin.Types.Hyperdata
61 import Gargantext.Database.Query.Table.Node.Error (HasNodeError, NodeError(DoesNotExist), nodeError)
62 import Gargantext.Database.Prelude
63 import Gargantext.Prelude.Crypto.Hash (Hash)
64 import Gargantext.Database.Schema.Context
65 import Gargantext.Database.Schema.Node
66 import Gargantext.Database.Schema.NodeContext
67 import Gargantext.Prelude
69 queryNodeContextTable :: Select NodeContextRead
70 queryNodeContextTable = selectTable nodeContextTable
72 -- | not optimized (get all ngrams without filters)
73 _nodesContexts :: Cmd err [NodeContext]
74 _nodesContexts = runOpaQuery queryNodeContextTable
76 ------------------------------------------------------------------------
77 -- | Basic NodeContext tools
78 getNodeContexts :: NodeId -> Cmd err [NodeContext]
79 getNodeContexts n = runOpaQuery (selectNodeContexts $ pgNodeId n)
81 selectNodeContexts :: Field SqlInt4 -> Select NodeContextRead
82 selectNodeContexts n' = proc () -> do
83 ns <- queryNodeContextTable -< ()
84 restrict -< _nc_node_id ns .== n'
88 getNodeContext :: HasNodeError err => ContextId -> NodeId -> Cmd err NodeContext
89 getNodeContext c n = do
90 maybeNodeContext <- headMay <$> runOpaQuery (selectNodeContext (pgNodeId c) (pgNodeId n))
91 case maybeNodeContext of
92 Nothing -> nodeError (DoesNotExist c)
95 selectNodeContext :: Field SqlInt4 -> Field SqlInt4 -> Select NodeContextRead
96 selectNodeContext c' n' = proc () -> do
97 ns <- queryNodeContextTable -< ()
98 restrict -< _nc_context_id ns .== c'
99 restrict -< _nc_node_id ns .== n'
102 updateNodeContextCategory :: ContextId -> NodeId -> Int -> Cmd err Int64
103 updateNodeContextCategory cId nId cat = do
104 execPGSQuery upScore (cat, cId, nId)
107 upScore = [sql| UPDATE nodes_contexts
112 data ContextForNgrams =
113 ContextForNgrams { _cfn_nodeId :: NodeId
114 , _cfn_hash :: Maybe Hash
115 , _cfn_userId :: UserId
116 , _cfn_parentId :: Maybe ParentId
117 , _cfn_c_title :: ContextTitle
118 , _cfn_date :: UTCTime
119 , _cfn_hyperdata :: HyperdataDocument }
120 getContextsForNgrams :: HasNodeError err
123 -> Cmd err [ContextForNgrams]
124 getContextsForNgrams cId ngramsIds = do
125 res <- runPGSQuery query (cId, PGS.In ngramsIds)
126 pure $ (\( _cfn_nodeId
132 , _cfn_hyperdata) -> ContextForNgrams { .. }) <$> res
135 query = [sql| SELECT contexts.id, hash_id, typename, user_id, parent_id, name, date, hyperdata
137 JOIN context_node_ngrams ON contexts.id = context_node_ngrams.context_id
138 JOIN nodes_contexts ON contexts.id = nodes_contexts.context_id
139 WHERE nodes_contexts.node_id = ?
140 AND context_node_ngrams.ngrams_id IN ? |]
142 data ContextForNgramsTerms =
143 ContextForNgramsTerms { _cfnt_nodeId :: NodeId
144 , _cfnt_hash :: Maybe Hash
145 , _cfnt_nodeTypeId :: NodeTypeId
146 , _cfnt_userId :: UserId
147 , _cfnt_parentId :: Maybe ParentId
148 , _cfnt_c_title :: ContextTitle
149 , _cfnt_date :: UTCTime
150 , _cfnt_hyperdata :: HyperdataDocument
151 , _cfnt_score :: Maybe Double
152 , _cfnt_category :: Maybe Int }
153 getContextsForNgramsTerms :: HasNodeError err
156 -> Cmd err [ContextForNgramsTerms]
157 getContextsForNgramsTerms cId ngramsTerms = do
158 res <- runPGSQuery query (cId, PGS.In ngramsTerms)
159 pure $ (\( _cfnt_nodeId
168 , _cfnt_category) -> ContextForNgramsTerms { .. }) <$> res
171 query = [sql| SELECT t.id, t.hash_id, t.typename, t.user_id, t.parent_id, t.name, t.date, t.hyperdata, t.score, t.category
173 SELECT DISTINCT ON (contexts.id)
182 nodes_contexts.score AS score,
183 nodes_contexts.category AS category,
184 context_node_ngrams.doc_count AS doc_count
186 JOIN context_node_ngrams ON contexts.id = context_node_ngrams.context_id
187 JOIN nodes_contexts ON contexts.id = nodes_contexts.context_id
188 JOIN ngrams ON context_node_ngrams.ngrams_id = ngrams.id
189 WHERE nodes_contexts.node_id = ?
190 AND ngrams.terms IN ?) t
191 ORDER BY t.doc_count DESC |]
195 -- | Query the `context_node_ngrams` table and return ngrams for given
196 -- `context_id` and `list_id`.
197 -- WARNING: `context_node_ngrams` can be outdated. This is because it
198 -- is expensive to keep all ngrams matching a given context and if
199 -- someone adds an ngram, we need to recompute its m2m relation to all
200 -- existing documents.
201 getContextNgrams :: HasNodeError err
205 getContextNgrams contextId listId = do
206 res <- runPGSQuery query (contextId, listId)
207 pure $ (\(PGS.Only term) -> term) <$> res
211 query = [sql| SELECT ngrams.terms
212 FROM context_node_ngrams
213 JOIN ngrams ON ngrams.id = ngrams_id
218 -- | Query the `contexts` table and return ngrams for given context_id
219 -- and list_id that match the search tsvector.
220 -- NOTE This is poor man's tokenization that is used as a hint for the
221 -- frontend highlighter.
222 -- NOTE We prefer `plainto_tsquery` over `phraseto_tsquery` as it is
223 -- more permissive (i.e. ignores word ordering). See
224 -- https://www.peterullrich.com/complete-guide-to-full-text-search-with-postgres-and-ecto
225 getContextNgramsMatchingFTS :: HasNodeError err
229 getContextNgramsMatchingFTS contextId listId = do
230 res <- runPGSQuery query (listId, listId, contextId)
231 pure $ (\(PGS.Only term) -> term) <$> res
235 query = [sql| WITH ngrams_ids AS
239 UNION SELECT ngrams_id
242 SELECT DISTINCT ngrams.terms
244 JOIN ngrams_ids ON ngrams_ids.ngrams_id = ngrams.id
245 -- JOIN node_ngrams ON node_ngrams.ngrams_id = ngrams.id
247 WHERE contexts.id = ?
248 -- AND node_ngrams.node_id = ?
249 AND (contexts.search @@ plainto_tsquery(ngrams.terms)
250 OR contexts.search @@ plainto_tsquery('french', ngrams.terms)) |]
251 ------------------------------------------------------------------------
252 insertNodeContext :: [NodeContext] -> Cmd err Int
253 insertNodeContext ns = mkCmd $ \conn -> fromIntegral <$> (runInsert_ conn
254 $ Insert nodeContextTable ns' rCount (Just DoNothing))
256 ns' :: [NodeContextWrite]
257 ns' = map (\(NodeContext i n c x y)
258 -> NodeContext (sqlInt4 <$> i)
266 ------------------------------------------------------------------------
267 type Node_Id = NodeId
268 type Context_Id = NodeId
270 deleteNodeContext :: Node_Id -> Context_Id -> Cmd err Int
271 deleteNodeContext n c = mkCmd $ \conn ->
272 fromIntegral <$> runDelete_ conn
273 (Delete nodeContextTable
274 (\(NodeContext _ n_id c_id _ _) -> n_id .== pgNodeId n
275 .&& c_id .== pgNodeId c
280 ------------------------------------------------------------------------
281 -- | Favorite management
282 nodeContextsCategory :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
283 nodeContextsCategory inputData = map (\(PGS.Only a) -> a)
284 <$> runPGSQuery catSelect (PGS.Only $ Values fields inputData)
286 fields = map (QualifiedIdentifier Nothing) ["int4","int4","int4"]
287 catSelect :: PGS.Query
288 catSelect = [sql| UPDATE nodes_contexts as nn0
289 SET category = nn1.category
290 FROM (?) as nn1(node_id,context_id,category)
291 WHERE nn0.node_id = nn1.node_id
292 AND nn0.context_id = nn1.context_id
293 RETURNING nn1.node_id
296 ------------------------------------------------------------------------
297 -- | Score management
298 nodeContextsScore :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
299 nodeContextsScore inputData = map (\(PGS.Only a) -> a)
300 <$> runPGSQuery catScore (PGS.Only $ Values fields inputData)
302 fields = map (QualifiedIdentifier Nothing) ["int4","int4","int4"]
303 catScore :: PGS.Query
304 catScore = [sql| UPDATE nodes_contexts as nn0
305 SET score = nn1.score
306 FROM (?) as nn1(node_id, context_id, score)
307 WHERE nn0.node_id = nn1.node_id
308 AND nn0.context_id = nn1.context_id
309 RETURNING nn1.context_id
313 ------------------------------------------------------------------------
314 selectCountDocs :: HasDBid NodeType => CorpusId -> Cmd err Int
315 selectCountDocs cId = runCountOpaQuery (queryCountDocs cId)
317 queryCountDocs cId' = proc () -> do
318 (c, nc) <- joinInCorpus -< ()
319 restrict -< restrictMaybe nc $ \nc' -> (nc' ^. nc_node_id) .== pgNodeId cId' .&&
320 (nc' ^. nc_category) .>= sqlInt4 1
321 restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
325 -- | TODO use UTCTime fast
326 selectDocsDates :: HasDBid NodeType => CorpusId -> Cmd err [Text]
327 selectDocsDates cId = map (head' "selectDocsDates" . splitOn "-")
329 <$> map (view hd_publication_date)
332 selectDocs :: HasDBid NodeType => CorpusId -> Cmd err [HyperdataDocument]
333 selectDocs cId = runOpaQuery (queryDocs cId)
335 queryDocs :: HasDBid NodeType => CorpusId -> O.Select (Field SqlJsonb)
336 queryDocs cId = proc () -> do
337 (c, nn) <- joinInCorpus -< ()
338 restrict -< restrictMaybe nn $ \nn' -> (nn' ^. nc_node_id) .== pgNodeId cId .&&
339 (nn' ^. nc_category) .>= sqlInt4 1
340 restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
341 returnA -< view (context_hyperdata) c
343 selectDocNodes :: HasDBid NodeType => CorpusId -> Cmd err [Context HyperdataDocument]
344 selectDocNodes cId = runOpaQuery (queryDocNodes cId)
346 queryDocNodes :: HasDBid NodeType => CorpusId -> O.Select ContextRead
347 queryDocNodes cId = proc () -> do
348 (c, nc) <- joinInCorpus -< ()
349 -- restrict -< restrictMaybe nc $ \nc' -> (nc' ^. nc_node_id) .== pgNodeId cId .&&
350 -- (nc' ^. nc_category) .>= sqlInt4 1
351 restrict -< matchMaybe nc $ \case
352 Nothing -> toFields True
353 Just nc' -> (nc' ^. nc_node_id) .== pgNodeId cId .&&
354 (nc' ^. nc_category) .>= sqlInt4 1
355 restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
358 joinInCorpus :: O.Select (ContextRead, MaybeFields NodeContextRead)
359 joinInCorpus = proc () -> do
360 c <- queryContextTable -< ()
361 nc <- optionalRestrict queryNodeContextTable -<
362 (\nc' -> (c ^. context_id) .== (nc' ^. nc_context_id))
366 joinOn1 :: O.Select (NodeRead, MaybeFields NodeContextRead)
367 joinOn1 = proc () -> do
368 n <- queryNodeTable -< ()
369 nc <- optionalRestrict queryNodeContextTable -<
370 (\nc' -> (nc' ^. nc_node_id) .== (n ^. node_id))
374 ------------------------------------------------------------------------
375 selectPublicContexts :: HasDBid NodeType => (Hyperdata a, DefaultFromField SqlJsonb a)
376 => Cmd err [(Node a, Maybe Int)]
377 selectPublicContexts = runOpaQuery (queryWithType NodeFolderPublic)
379 queryWithType :: HasDBid NodeType => NodeType -> O.Select (NodeRead, MaybeFields (Field SqlInt4))
380 queryWithType nt = proc () -> do
381 (n, nc) <- joinOn1 -< ()
382 restrict -< (n ^. node_typename) .== sqlInt4 (toDBid nt)
383 returnA -< (n, view nc_context_id <$> nc)