]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/NodeNgramsNgrams.hs
[NGRAMS] Extract the listId parameter
[gargantext.git] / src / Gargantext / Database / Schema / NodeNgramsNgrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.NodeNgramsNgrams
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 NodeNgramsNgrams table is used to group Ngrams
11 - NodeId :: List Id
12 - NgramId_1, NgramId_2 where all NgramId_2 will be added to NgramId_1
13 - weight: probability of the relation (TODO, fixed to 1 for simple stemming)
14
15 Next Step benchmark:
16 - recursive queries of postgres
17 - group with: https://en.wikipedia.org/wiki/Nested_set_model
18
19 -}
20
21 {-# LANGUAGE Arrows #-}
22 {-# LANGUAGE FlexibleInstances #-}
23 {-# LANGUAGE FunctionalDependencies #-}
24 {-# LANGUAGE MultiParamTypeClasses #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE OverloadedStrings #-}
27 {-# LANGUAGE QuasiQuotes #-}
28 {-# LANGUAGE RankNTypes #-}
29 {-# LANGUAGE TemplateHaskell #-}
30 {-# OPTIONS_GHC -fno-warn-orphans #-}
31
32 module Gargantext.Database.Schema.NodeNgramsNgrams
33 where
34
35 import Control.Lens (view)
36 import Control.Lens.TH (makeLensesWith, abbreviatedFields)
37 import Control.Monad (void)
38 import Control.Monad.IO.Class (liftIO)
39 import Data.ByteString (ByteString)
40 import Data.Text (Text)
41 import Data.Maybe (Maybe)
42 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
43 import Database.PostgreSQL.Simple.SqlQQ (sql)
44 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
45 import Debug.Trace (trace)
46 import Gargantext.Database.Utils (Cmd, runOpaQuery, execPGSQuery, connection, formatPGSQuery)
47 import Gargantext.Database.Types.Node (ListId)
48 import Gargantext.Database.Schema.Node (pgNodeId)
49 import Gargantext.Prelude
50 import Opaleye
51 import qualified Database.PostgreSQL.Simple as PGS
52
53 data NodeNgramsNgramsPoly node_id ngram1_id ngram2_id weight =
54 NodeNgramsNgrams { _nng_NodeId :: node_id
55 , _nng_Ngram1Id :: ngram1_id
56 , _nng_Ngram2Id :: ngram2_id
57 , _nng_Weight :: weight
58 } deriving (Show)
59
60 type NodeNgramsNgramsWrite =
61 NodeNgramsNgramsPoly (Column PGInt4 )
62 (Column PGInt4 )
63 (Column PGInt4 )
64 (Maybe (Column PGFloat8))
65
66 type NodeNgramsNgramsRead =
67 NodeNgramsNgramsPoly (Column PGInt4 )
68 (Column PGInt4 )
69 (Column PGInt4 )
70 (Column PGFloat8)
71
72 type NodeNgramsNgrams =
73 NodeNgramsNgramsPoly ListId
74 Int
75 Int
76 (Maybe Double)
77
78 $(makeAdaptorAndInstance "pNodeNgramsNgrams"
79 ''NodeNgramsNgramsPoly)
80 $(makeLensesWith abbreviatedFields
81 ''NodeNgramsNgramsPoly)
82
83
84 nodeNgramsNgramsTable :: Table NodeNgramsNgramsWrite NodeNgramsNgramsRead
85 nodeNgramsNgramsTable =
86 Table "nodes_ngrams_ngrams"
87 ( pNodeNgramsNgrams NodeNgramsNgrams
88 { _nng_NodeId = required "node_id"
89 , _nng_Ngram1Id = required "ngram1_id"
90 , _nng_Ngram2Id = required "ngram2_id"
91 , _nng_Weight = optional "weight"
92 }
93 )
94
95 queryNodeNgramsNgramsTable :: Query NodeNgramsNgramsRead
96 queryNodeNgramsNgramsTable = queryTable nodeNgramsNgramsTable
97
98 -- | Select NodeNgramsNgrams
99 -- TODO not optimized (get all ngrams without filters)
100 nodeNgramsNgrams :: Cmd err [NodeNgramsNgrams]
101 nodeNgramsNgrams = runOpaQuery queryNodeNgramsNgramsTable
102
103 instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
104 queryRunnerColumnDefault = fieldQueryRunnerColumn
105
106 instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
107 queryRunnerColumnDefault = fieldQueryRunnerColumn
108
109
110 -- TODO: Add option on conflict
111 insertNodeNgramsNgramsNew :: [NodeNgramsNgrams] -> Cmd err Int
112 insertNodeNgramsNgramsNew = insertNodeNgramsNgramsW
113 . map (\(NodeNgramsNgrams n ng1 ng2 maybeWeight) ->
114 NodeNgramsNgrams (pgNodeId n )
115 (pgInt4 ng1)
116 (pgInt4 ng2)
117 (pgDouble <$> maybeWeight)
118 )
119
120 insertNodeNgramsNgramsW :: [NodeNgramsNgramsWrite] -> Cmd err Int
121 insertNodeNgramsNgramsW ns = do
122 c <- view connection
123 liftIO $ fromIntegral <$> runInsertMany c nodeNgramsNgramsTable ns
124
125 ------------------------------------------------------------------------
126 data Action = Del | Add
127
128 type NgramsParent = Text
129 type NgramsChild = Text
130
131
132 ngramsGroup :: Action -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)]
133 -> Cmd err ()
134 ngramsGroup _ _ [] = pure ()
135 ngramsGroup action listId ngs = trace (show ngs) $ runNodeNgramsNgrams q listId ngs
136 where
137 q = case action of
138 Del -> queryDelNodeNgramsNgrams
139 Add -> queryInsertNodeNgramsNgrams
140
141
142 runNodeNgramsNgrams :: PGS.Query -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)] -> Cmd err ()
143 runNodeNgramsNgrams q listId ngs = void $ execPGSQuery q (listId, Values fields ngs')
144 where
145 ngs' = map (\(ng1,ng2,w) -> (ng1,ng2,maybe 0 identity w)) ngs
146 fields = map (\t -> QualifiedIdentifier Nothing t)
147 ["int4","text","text","float8"]
148
149 runNodeNgramsNgramsDebug :: PGS.Query -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)] -> Cmd err ByteString
150 runNodeNgramsNgramsDebug q listId ngs = formatPGSQuery q (listId, Values fields ngs')
151 where
152 ngs' = map (\(ng1,ng2,w) -> (ng1,ng2,maybe 0 identity w)) ngs
153 fields = map (\t -> QualifiedIdentifier Nothing t)
154 ["int4","text","text","float8"]
155
156
157 --------------------------------------------------------------------
158 -- TODO: on conflict update weight
159 queryInsertNodeNgramsNgrams :: PGS.Query
160 queryInsertNodeNgramsNgrams = [sql|
161 WITH nId AS ?
162 WITH input_rows(ng1,ng2,w) AS (?)
163 INSERT INTO nodes_ngrams_ngrams (node_id,ngram1_id,ngram2_id,weight)
164 SELECT nId,ngrams1.id,ngrams2.id,w FROM input_rows
165 JOIN ngrams ngrams1 ON ngrams1.terms = ng1
166 JOIN ngrams ngrams2 ON ngrams2.terms = ng2
167 ON CONFLICT (node_id,ngram1_id,ngram2_id) DO NOTHING -- unique index created here
168 |]
169
170 queryDelNodeNgramsNgrams :: PGS.Query
171 queryDelNodeNgramsNgrams = [sql|
172 WITH nId AS ?
173 WITH input(ng1,ng2,w) AS (?)
174 DELETE FROM nodes_ngrams_ngrams AS nnn
175 USING ngrams AS ngrams1,
176 ngrams AS ngrams2,
177 input AS input
178 WHERE
179 ngrams1.terms = input.ng1
180 AND ngrams2.terms = input.ng2
181 AND nnn.node_id = input.nId
182 AND nnn.ngram1_id = ngrams1.id
183 AND nnn.ngram2_id = ngrams2.id
184 ;
185 |]
186