2 Module : Gargantext.Database.Schema.NodeNgramsNgrams
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 NodeNgramsNgrams table is used to group Ngrams
12 - NgramId_1, NgramId_2 where all NgramId_2 will be added to NgramId_1
13 - weight: probability of the relation (TODO, fixed to 1 for simple stemming)
16 - recursive queries of postgres
17 - group with: https://en.wikipedia.org/wiki/Nested_set_model
21 {-# LANGUAGE Arrows #-}
22 {-# LANGUAGE FlexibleInstances #-}
23 {-# LANGUAGE FunctionalDependencies #-}
24 {-# LANGUAGE MultiParamTypeClasses #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE OverloadedStrings #-}
27 {-# LANGUAGE QuasiQuotes #-}
28 {-# LANGUAGE RankNTypes #-}
29 {-# LANGUAGE TemplateHaskell #-}
30 {-# OPTIONS_GHC -fno-warn-orphans #-}
32 module Gargantext.Database.Schema.NodeNgramsNgrams
35 import Control.Lens (view)
36 import Control.Lens.TH (makeLensesWith, abbreviatedFields)
37 import Control.Monad (void)
38 import Control.Monad.IO.Class (liftIO)
39 import Data.ByteString (ByteString)
40 import Data.Text (Text)
41 import Data.Maybe (Maybe)
42 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
43 import Database.PostgreSQL.Simple.SqlQQ (sql)
44 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
45 import Debug.Trace (trace)
46 import Gargantext.Database.Utils (Cmd, runOpaQuery, execPGSQuery, connection, formatPGSQuery)
47 import Gargantext.Database.Types.Node (ListId)
48 import Gargantext.Database.Schema.Node (pgNodeId)
49 import Gargantext.Prelude
51 import qualified Database.PostgreSQL.Simple as PGS
53 data NodeNgramsNgramsPoly node_id ngram1_id ngram2_id weight =
54 NodeNgramsNgrams { _nng_NodeId :: node_id
55 , _nng_Ngram1Id :: ngram1_id
56 , _nng_Ngram2Id :: ngram2_id
57 , _nng_Weight :: weight
60 type NodeNgramsNgramsWrite =
61 NodeNgramsNgramsPoly (Column PGInt4 )
64 (Maybe (Column PGFloat8))
66 type NodeNgramsNgramsRead =
67 NodeNgramsNgramsPoly (Column PGInt4 )
72 type NodeNgramsNgrams =
73 NodeNgramsNgramsPoly ListId
78 $(makeAdaptorAndInstance "pNodeNgramsNgrams"
79 ''NodeNgramsNgramsPoly)
80 $(makeLensesWith abbreviatedFields
81 ''NodeNgramsNgramsPoly)
84 nodeNgramsNgramsTable :: Table NodeNgramsNgramsWrite NodeNgramsNgramsRead
85 nodeNgramsNgramsTable =
86 Table "nodes_ngrams_ngrams"
87 ( pNodeNgramsNgrams NodeNgramsNgrams
88 { _nng_NodeId = required "node_id"
89 , _nng_Ngram1Id = required "ngram1_id"
90 , _nng_Ngram2Id = required "ngram2_id"
91 , _nng_Weight = optional "weight"
95 queryNodeNgramsNgramsTable :: Query NodeNgramsNgramsRead
96 queryNodeNgramsNgramsTable = queryTable nodeNgramsNgramsTable
98 -- | Select NodeNgramsNgrams
99 -- TODO not optimized (get all ngrams without filters)
100 nodeNgramsNgrams :: Cmd err [NodeNgramsNgrams]
101 nodeNgramsNgrams = runOpaQuery queryNodeNgramsNgramsTable
103 instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
104 queryRunnerColumnDefault = fieldQueryRunnerColumn
106 instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
107 queryRunnerColumnDefault = fieldQueryRunnerColumn
110 -- TODO: Add option on conflict
111 insertNodeNgramsNgramsNew :: [NodeNgramsNgrams] -> Cmd err Int
112 insertNodeNgramsNgramsNew = insertNodeNgramsNgramsW
113 . map (\(NodeNgramsNgrams n ng1 ng2 maybeWeight) ->
114 NodeNgramsNgrams (pgNodeId n )
117 (pgDouble <$> maybeWeight)
120 insertNodeNgramsNgramsW :: [NodeNgramsNgramsWrite] -> Cmd err Int
121 insertNodeNgramsNgramsW ns = do
123 liftIO $ fromIntegral <$> runInsertMany c nodeNgramsNgramsTable ns
125 ------------------------------------------------------------------------
126 data Action = Del | Add
128 type NgramsParent = Text
129 type NgramsChild = Text
132 ngramsGroup :: Action -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)]
134 ngramsGroup _ _ [] = pure ()
135 ngramsGroup action listId ngs = trace (show ngs) $ runNodeNgramsNgrams q listId ngs
138 Del -> queryDelNodeNgramsNgrams
139 Add -> queryInsertNodeNgramsNgrams
142 runNodeNgramsNgrams :: PGS.Query -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)] -> Cmd err ()
143 runNodeNgramsNgrams q listId ngs = void $ execPGSQuery q (listId, Values fields ngs')
145 ngs' = map (\(ng1,ng2,w) -> (ng1,ng2,maybe 0 identity w)) ngs
146 fields = map (\t -> QualifiedIdentifier Nothing t)
147 ["int4","text","text","float8"]
149 runNodeNgramsNgramsDebug :: PGS.Query -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)] -> Cmd err ByteString
150 runNodeNgramsNgramsDebug q listId ngs = formatPGSQuery q (listId, Values fields ngs')
152 ngs' = map (\(ng1,ng2,w) -> (ng1,ng2,maybe 0 identity w)) ngs
153 fields = map (\t -> QualifiedIdentifier Nothing t)
154 ["int4","text","text","float8"]
157 --------------------------------------------------------------------
158 -- TODO: on conflict update weight
159 queryInsertNodeNgramsNgrams :: PGS.Query
160 queryInsertNodeNgramsNgrams = [sql|
162 WITH input_rows(ng1,ng2,w) AS (?)
163 INSERT INTO nodes_ngrams_ngrams (node_id,ngram1_id,ngram2_id,weight)
164 SELECT nId,ngrams1.id,ngrams2.id,w FROM input_rows
165 JOIN ngrams ngrams1 ON ngrams1.terms = ng1
166 JOIN ngrams ngrams2 ON ngrams2.terms = ng2
167 ON CONFLICT (node_id,ngram1_id,ngram2_id) DO NOTHING -- unique index created here
170 queryDelNodeNgramsNgrams :: PGS.Query
171 queryDelNodeNgramsNgrams = [sql|
173 WITH input(ng1,ng2,w) AS (?)
174 DELETE FROM nodes_ngrams_ngrams AS nnn
175 USING ngrams AS ngrams1,
179 ngrams1.terms = input.ng1
180 AND ngrams2.terms = input.ng2
181 AND nnn.node_id = input.nId
182 AND nnn.ngram1_id = ngrams1.id
183 AND nnn.ngram2_id = ngrams2.id