src/Gargantext/Database/Query/Table/NodeContext.hs

   1 {-|
   2 Module      : Gargantext.Database.Query.Table.NodeContext
   3 Description :
   4 Copyright   : (c) CNRS, 2017-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 Here is a longer description of this module, containing some
  11 commentary with @some markup@.
  12 -}
  13
  14 {-# OPTIONS_GHC -fno-warn-orphans #-}
  15
  16 {-# LANGUAGE Arrows                 #-}
  17 {-# LANGUAGE FunctionalDependencies #-}
  18 {-# LANGUAGE LambdaCase             #-}
  19 {-# LANGUAGE QuasiQuotes            #-}
  20 {-# LANGUAGE TemplateHaskell        #-}
  21
  22 module Gargantext.Database.Query.Table.NodeContext
  23   ( module Gargantext.Database.Schema.NodeContext
  24   , queryNodeContextTable
  25   , selectDocsDates
  26   , selectDocNodes
  27   , selectDocs
  28   , nodeContextsCategory
  29   , nodeContextsScore
  30   , getNodeContexts
  31   , getNodeContext
  32   , updateNodeContextCategory
  33   , getContextsForNgrams
  34   , ContextForNgrams(..)
  35   , getContextsForNgramsTerms
  36   , getContextNgrams
  37   , getContextNgramsMatchingFTS
  38   , ContextForNgramsTerms(..)
  39   , insertNodeContext
  40   , deleteNodeContext
  41   , selectPublicContexts
  42   , selectCountDocs
  43   )
  44   where
  45
  46 import Control.Arrow (returnA)
  47 import Control.Lens (view, (^.))
  48 import Data.Maybe (catMaybes)
  49 import Data.Time (UTCTime)
  50 import Data.Text (Text, splitOn)
  51 import Database.PostgreSQL.Simple.SqlQQ (sql)
  52 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
  53 import Opaleye
  54 import qualified Database.PostgreSQL.Simple as PGS (In(..), Query, Only(..))
  55 import qualified Opaleye as O
  56
  57 import Gargantext.Core
  58 import Gargantext.Core.Types
  59 -- import Gargantext.Core.Types.Search (HyperdataRow(..), toHyperdataRow)
  60 import Gargantext.Database.Admin.Types.Hyperdata
  61 import Gargantext.Database.Query.Table.Node.Error (HasNodeError, NodeError(DoesNotExist), nodeError)
  62 import Gargantext.Database.Prelude
  63 import Gargantext.Prelude.Crypto.Hash (Hash)
  64 import Gargantext.Database.Schema.Context
  65 import Gargantext.Database.Schema.Node
  66 import Gargantext.Database.Schema.NodeContext
  67 import Gargantext.Prelude
  68
  69 queryNodeContextTable :: Select NodeContextRead
  70 queryNodeContextTable = selectTable nodeContextTable
  71
  72 -- | not optimized (get all ngrams without filters)
  73 _nodesContexts :: Cmd err [NodeContext]
  74 _nodesContexts = runOpaQuery queryNodeContextTable
  75
  76 ------------------------------------------------------------------------
  77 -- | Basic NodeContext tools
  78 getNodeContexts :: NodeId -> Cmd err [NodeContext]
  79 getNodeContexts n = runOpaQuery (selectNodeContexts $ pgNodeId n)
  80   where
  81     selectNodeContexts :: Field SqlInt4 -> Select NodeContextRead
  82     selectNodeContexts n' = proc () -> do
  83       ns <- queryNodeContextTable -< ()
  84       restrict -< _nc_node_id ns .== n'
  85       returnA -< ns
  86
  87
  88 getNodeContext :: HasNodeError err => ContextId -> NodeId -> Cmd err NodeContext
  89 getNodeContext c n = do
  90   maybeNodeContext <- headMay <$> runOpaQuery (selectNodeContext (pgNodeId c) (pgNodeId n))
  91   case maybeNodeContext of
  92     Nothing -> nodeError (DoesNotExist c)
  93     Just  r -> pure r
  94   where
  95     selectNodeContext :: Field SqlInt4 -> Field SqlInt4 -> Select NodeContextRead
  96     selectNodeContext c' n' = proc () -> do
  97       ns <- queryNodeContextTable -< ()
  98       restrict -< _nc_context_id ns .== c'
  99       restrict -< _nc_node_id ns .== n'
 100       returnA -< ns
 101
 102 updateNodeContextCategory :: ContextId -> NodeId -> Int -> Cmd err Int64
 103 updateNodeContextCategory cId nId cat = do
 104   execPGSQuery upScore (cat, cId, nId)
 105   where
 106     upScore :: PGS.Query
 107     upScore = [sql| UPDATE nodes_contexts
 108                       SET category = ?
 109                       WHERE context_id = ?
 110                       AND node_id = ? |]
 111
 112 data ContextForNgrams =
 113   ContextForNgrams { _cfn_nodeId    :: NodeId
 114                    , _cfn_hash      :: Maybe Hash
 115                    , _cfn_userId    :: UserId
 116                    , _cfn_parentId  :: Maybe ParentId
 117                    , _cfn_c_title   :: ContextTitle
 118                    , _cfn_date      :: UTCTime
 119                    , _cfn_hyperdata :: HyperdataDocument }
 120 getContextsForNgrams :: HasNodeError err
 121                      => NodeId
 122                      -> [Int]
 123                      -> Cmd err [ContextForNgrams]
 124 getContextsForNgrams cId ngramsIds = do
 125   res <- runPGSQuery query (cId, PGS.In ngramsIds)
 126   pure $ (\( _cfn_nodeId
 127            , _cfn_hash
 128            , _cfn_userId
 129            , _cfn_parentId
 130            , _cfn_c_title
 131            , _cfn_date
 132            , _cfn_hyperdata) -> ContextForNgrams { .. }) <$> res
 133   where
 134     query :: PGS.Query
 135     query = [sql| SELECT contexts.id, hash_id, typename, user_id, parent_id, name, date, hyperdata
 136                   FROM contexts
 137                   JOIN context_node_ngrams ON contexts.id = context_node_ngrams.context_id
 138                   JOIN nodes_contexts ON contexts.id = nodes_contexts.context_id
 139                   WHERE nodes_contexts.node_id = ?
 140                    AND context_node_ngrams.ngrams_id IN ? |]
 141
 142 data ContextForNgramsTerms =
 143   ContextForNgramsTerms { _cfnt_nodeId     :: NodeId
 144                         , _cfnt_hash       :: Maybe Hash
 145                         , _cfnt_nodeTypeId :: NodeTypeId
 146                         , _cfnt_userId     :: UserId
 147                         , _cfnt_parentId   :: Maybe ParentId
 148                         , _cfnt_c_title    :: ContextTitle
 149                         , _cfnt_date       :: UTCTime
 150                         , _cfnt_hyperdata  :: HyperdataDocument
 151                         , _cfnt_score      :: Maybe Double
 152                         , _cfnt_category   :: Maybe Int }
 153 getContextsForNgramsTerms :: HasNodeError err
 154                           => NodeId
 155                           -> [Text]
 156                           -> Cmd err [ContextForNgramsTerms]
 157 getContextsForNgramsTerms cId ngramsTerms = do
 158   res <- runPGSQuery query (cId, PGS.In ngramsTerms)
 159   pure $ (\( _cfnt_nodeId
 160            , _cfnt_hash
 161            , _cfnt_nodeTypeId
 162            , _cfnt_userId
 163            , _cfnt_parentId
 164            , _cfnt_c_title
 165            , _cfnt_date
 166            , _cfnt_hyperdata
 167            , _cfnt_score
 168            , _cfnt_category) -> ContextForNgramsTerms { .. }) <$> res
 169   where
 170     query :: PGS.Query
 171     query = [sql| SELECT t.id, t.hash_id, t.typename, t.user_id, t.parent_id, t.name, t.date, t.hyperdata, t.score, t.category
 172                 FROM (
 173                   SELECT DISTINCT ON (contexts.id)
 174                        contexts.id AS id,
 175                        hash_id,
 176                        typename,
 177                        user_id,
 178                        parent_id,
 179                        name,
 180                        date,
 181                        hyperdata,
 182                        nodes_contexts.score AS score,
 183                        nodes_contexts.category AS category,
 184                        context_node_ngrams.doc_count AS doc_count
 185                     FROM contexts
 186                     JOIN context_node_ngrams ON contexts.id = context_node_ngrams.context_id
 187                     JOIN nodes_contexts ON contexts.id = nodes_contexts.context_id
 188                     JOIN ngrams ON context_node_ngrams.ngrams_id = ngrams.id
 189                     WHERE nodes_contexts.node_id = ?
 190                      AND ngrams.terms IN ?) t
 191                    ORDER BY t.doc_count DESC |]
 192
 193
 194
 195 -- | Query the `context_node_ngrams` table and return ngrams for given
 196 -- `context_id` and `list_id`.
 197 -- WARNING: `context_node_ngrams` can be outdated. This is because it
 198 -- is expensive to keep all ngrams matching a given context and if
 199 -- someone adds an ngram, we need to recompute its m2m relation to all
 200 -- existing documents.
 201 getContextNgrams :: HasNodeError err
 202                  => NodeId
 203                  -> NodeId
 204                  -> Cmd err [Text]
 205 getContextNgrams contextId listId = do
 206   res <- runPGSQuery query (contextId, listId)
 207   pure $ (\(PGS.Only term) -> term) <$> res
 208
 209   where
 210     query :: PGS.Query
 211     query = [sql| SELECT ngrams.terms
 212                 FROM context_node_ngrams
 213                 JOIN ngrams ON ngrams.id = ngrams_id
 214                 WHERE context_id = ?
 215                 AND node_id = ? |]
 216
 217
 218 -- | Query the `contexts` table and return ngrams for given context_id
 219 -- and list_id that match the search tsvector.
 220 -- NOTE This is poor man's tokenization that is used as a hint for the
 221 -- frontend highlighter.
 222 -- NOTE We prefer `plainto_tsquery` over `phraseto_tsquery` as it is
 223 -- more permissive (i.e. ignores word ordering). See
 224 -- https://www.peterullrich.com/complete-guide-to-full-text-search-with-postgres-and-ecto
 225 getContextNgramsMatchingFTS :: HasNodeError err
 226                             => NodeId
 227                             -> NodeId
 228                             -> Cmd err [Text]
 229 getContextNgramsMatchingFTS contextId listId = do
 230   res <- runPGSQuery query (listId, listId, contextId)
 231   pure $ (\(PGS.Only term) -> term) <$> res
 232
 233   where
 234     query :: PGS.Query
 235     query = [sql| WITH ngrams_ids AS
 236                 (SELECT ngrams_id
 237                  FROM node_stories
 238                  WHERE node_id = ?
 239                  UNION SELECT ngrams_id
 240                  FROM node_ngrams
 241                  WHERE node_id = ?)
 242                 SELECT DISTINCT ngrams.terms
 243                 FROM ngrams
 244                 JOIN ngrams_ids ON ngrams_ids.ngrams_id = ngrams.id
 245                 -- JOIN node_ngrams ON node_ngrams.ngrams_id = ngrams.id
 246                 CROSS JOIN contexts
 247                 WHERE contexts.id = ?
 248                 -- AND node_ngrams.node_id = ?
 249                 AND (contexts.search @@ plainto_tsquery(ngrams.terms)
 250                   OR contexts.search @@ plainto_tsquery('french', ngrams.terms)) |]
 251 ------------------------------------------------------------------------
 252 insertNodeContext :: [NodeContext] -> Cmd err Int
 253 insertNodeContext ns = mkCmd $ \conn -> fromIntegral <$> (runInsert_ conn
 254                           $ Insert nodeContextTable ns' rCount (Just DoNothing))
 255   where
 256     ns' :: [NodeContextWrite]
 257     ns' = map (\(NodeContext i n c x y)
 258                 -> NodeContext (sqlInt4 <$> i)
 259                                (pgNodeId n)
 260                                (pgNodeId c)
 261                                (sqlDouble <$> x)
 262                                (sqlInt4  <$> y)
 263               ) ns
 264
 265
 266 ------------------------------------------------------------------------
 267 type Node_Id    = NodeId
 268 type Context_Id = NodeId
 269
 270 deleteNodeContext :: Node_Id -> Context_Id -> Cmd err Int
 271 deleteNodeContext n c = mkCmd $ \conn ->
 272   fromIntegral <$> runDelete_ conn
 273                               (Delete nodeContextTable
 274                                       (\(NodeContext _ n_id c_id _ _) -> n_id .== pgNodeId n
 275                                                                    .&& c_id .== pgNodeId c
 276                                       )
 277                                       rCount
 278                               )
 279
 280 ------------------------------------------------------------------------
 281 -- | Favorite management
 282 nodeContextsCategory :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
 283 nodeContextsCategory inputData = map (\(PGS.Only a) -> a)
 284                             <$> runPGSQuery catSelect (PGS.Only $ Values fields inputData)
 285   where
 286     fields = map (QualifiedIdentifier Nothing) ["int4","int4","int4"]
 287     catSelect :: PGS.Query
 288     catSelect = [sql| UPDATE nodes_contexts as nn0
 289                       SET category = nn1.category
 290                        FROM (?) as nn1(node_id,context_id,category)
 291                        WHERE nn0.node_id    = nn1.node_id
 292                        AND   nn0.context_id = nn1.context_id
 293                        RETURNING nn1.node_id
 294                   |]
 295
 296 ------------------------------------------------------------------------
 297 -- | Score management
 298 nodeContextsScore :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
 299 nodeContextsScore inputData = map (\(PGS.Only a) -> a)
 300                             <$> runPGSQuery catScore (PGS.Only $ Values fields inputData)
 301   where
 302     fields = map (QualifiedIdentifier Nothing) ["int4","int4","int4"]
 303     catScore :: PGS.Query
 304     catScore = [sql| UPDATE nodes_contexts as nn0
 305                       SET score = nn1.score
 306                        FROM (?) as nn1(node_id, context_id, score)
 307                        WHERE nn0.node_id    = nn1.node_id
 308                        AND   nn0.context_id = nn1.context_id
 309                        RETURNING nn1.context_id
 310                   |]
 311
 312
 313 ------------------------------------------------------------------------
 314 selectCountDocs :: HasDBid NodeType => CorpusId -> Cmd err Int
 315 selectCountDocs cId = runCountOpaQuery (queryCountDocs cId)
 316   where
 317     queryCountDocs cId' = proc () -> do
 318       (c, nc) <- joinInCorpus -< ()
 319       restrict -< restrictMaybe nc $ \nc' -> (nc' ^. nc_node_id)  .== pgNodeId cId' .&&
 320                                              (nc' ^. nc_category) .>= sqlInt4 1
 321       restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
 322       returnA  -< c
 323
 324
 325 -- | TODO use UTCTime fast
 326 selectDocsDates :: HasDBid NodeType => CorpusId -> Cmd err [Text]
 327 selectDocsDates cId =  map (head' "selectDocsDates" . splitOn "-")
 328                    <$> catMaybes
 329                    <$> map (view hd_publication_date)
 330                    <$> selectDocs cId
 331
 332 selectDocs :: HasDBid NodeType => CorpusId -> Cmd err [HyperdataDocument]
 333 selectDocs cId = runOpaQuery (queryDocs cId)
 334
 335 queryDocs :: HasDBid NodeType => CorpusId -> O.Select (Field SqlJsonb)
 336 queryDocs cId = proc () -> do
 337   (c, nn)  <- joinInCorpus -< ()
 338   restrict -< restrictMaybe nn $ \nn' -> (nn' ^. nc_node_id)  .== pgNodeId cId .&&
 339                                          (nn' ^. nc_category) .>= sqlInt4 1
 340   restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
 341   returnA  -< view (context_hyperdata) c
 342
 343 selectDocNodes :: HasDBid NodeType => CorpusId -> Cmd err [Context HyperdataDocument]
 344 selectDocNodes cId = runOpaQuery (queryDocNodes cId)
 345
 346 queryDocNodes :: HasDBid NodeType => CorpusId -> O.Select ContextRead
 347 queryDocNodes cId = proc () -> do
 348   (c, nc) <- joinInCorpus -< ()
 349   -- restrict -< restrictMaybe nc $ \nc' -> (nc' ^. nc_node_id)  .== pgNodeId cId .&&
 350   --                                        (nc' ^. nc_category) .>= sqlInt4 1
 351   restrict -< matchMaybe nc $ \case
 352     Nothing  -> toFields True
 353     Just nc' -> (nc' ^. nc_node_id)  .== pgNodeId cId .&&
 354                 (nc' ^. nc_category) .>= sqlInt4 1
 355   restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
 356   returnA -<  c
 357
 358 joinInCorpus :: O.Select (ContextRead, MaybeFields NodeContextRead)
 359 joinInCorpus = proc () -> do
 360   c <- queryContextTable -< ()
 361   nc <- optionalRestrict queryNodeContextTable -<
 362     (\nc' -> (c ^. context_id) .== (nc' ^. nc_context_id))
 363   returnA -< (c, nc)
 364
 365
 366 joinOn1 :: O.Select (NodeRead, MaybeFields NodeContextRead)
 367 joinOn1 = proc () -> do
 368   n <- queryNodeTable -< ()
 369   nc <- optionalRestrict queryNodeContextTable -<
 370     (\nc' -> (nc' ^. nc_node_id) .== (n ^. node_id))
 371   returnA -< (n, nc)
 372
 373
 374 ------------------------------------------------------------------------
 375 selectPublicContexts :: HasDBid NodeType => (Hyperdata a, DefaultFromField SqlJsonb a)
 376                   => Cmd err [(Node a, Maybe Int)]
 377 selectPublicContexts = runOpaQuery (queryWithType NodeFolderPublic)
 378
 379 queryWithType :: HasDBid NodeType => NodeType -> O.Select (NodeRead, MaybeFields (Field SqlInt4))
 380 queryWithType nt = proc () -> do
 381   (n, nc) <- joinOn1 -< ()
 382   restrict -< (n ^. node_typename) .== sqlInt4 (toDBid nt)
 383   returnA  -<  (n, view nc_context_id <$> nc)