]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/NodeNgram.hs
add a filter for fis with too few ngrams
[gargantext.git] / src / Gargantext / Database / Schema / NodeNgram.hs
1 {-|
2 Module : Gargantext.Database.Schema.NodeNgrams
3 Description : NodeNgram for Ngram indexation or Lists
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 NodeNgram: relation between a Node and a Ngrams
11
12 if Node is a Document then it is indexing
13 if Node is a List then it is listing (either Stop, Candidate or Map)
14
15 -}
16
17 {-# OPTIONS_GHC -fno-warn-orphans #-}
18
19 {-# LANGUAGE Arrows #-}
20 {-# LANGUAGE FlexibleInstances #-}
21 {-# LANGUAGE FunctionalDependencies #-}
22 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
23 {-# LANGUAGE MultiParamTypeClasses #-}
24 {-# LANGUAGE NoImplicitPrelude #-}
25 {-# LANGUAGE OverloadedStrings #-}
26 {-# LANGUAGE QuasiQuotes #-}
27 {-# LANGUAGE RankNTypes #-}
28 {-# LANGUAGE TemplateHaskell #-}
29
30
31 -- TODO NodeNgrams
32 module Gargantext.Database.Schema.NodeNgram where
33
34 import Data.ByteString (ByteString)
35 import Data.Text (Text)
36 import Control.Lens.TH (makeLenses)
37 import Control.Monad (void)
38 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
39 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
40 import Database.PostgreSQL.Simple.SqlQQ (sql)
41 import Gargantext.Database.Utils (mkCmd, Cmd, execPGSQuery)
42 import Gargantext.Core.Types.Main (ListTypeId)
43 import Gargantext.Database.Types.Node (NodeId, ListId)
44 import Gargantext.Database.Schema.Node (pgNodeId)
45 import Gargantext.Database.Schema.Ngrams (NgramsTypeId, pgNgramsTypeId)
46 import Gargantext.Prelude
47 import Gargantext.Database.Utils (formatPGSQuery)
48 import Opaleye
49 import qualified Database.PostgreSQL.Simple as DPS
50
51 -- | TODO : remove id
52 data NodeNgramPoly node_id ngrams_id parent_id ngrams_type list_type weight
53 = NodeNgram { nng_node_id :: node_id
54 , nng_ngrams_id :: ngrams_id
55 , nng_parent_id :: parent_id
56
57 , nng_ngramsType :: ngrams_type
58 , nng_listType :: list_type
59 , nng_weight :: weight
60 } deriving (Show)
61
62 type NodeNgramWrite =
63 NodeNgramPoly
64 (Column PGInt4 )
65 (Column PGInt4 )
66 (Maybe (Column PGInt4))
67
68 (Column PGInt4 )
69 (Column PGInt4 )
70 (Column PGFloat8)
71
72 type NodeNgramRead =
73 NodeNgramPoly
74 (Column PGInt4 )
75 (Column PGInt4 )
76 (Column PGInt4 )
77
78 (Column PGInt4 )
79 (Column PGInt4 )
80 (Column PGFloat8)
81
82 type NodeNgramReadNull =
83 NodeNgramPoly
84 (Column (Nullable PGInt4 ))
85 (Column (Nullable PGInt4 ))
86 (Column (Nullable PGInt4 ))
87
88 (Column (Nullable PGInt4 ))
89 (Column (Nullable PGInt4 ))
90 (Column (Nullable PGFloat8))
91
92 type NodeNgram =
93 NodeNgramPoly NodeId Int (Maybe NgramsParentId) NgramsTypeId Int Double
94
95 newtype NgramsParentId = NgramsParentId Int
96 deriving (Show, Eq, Num)
97
98 pgNgramsParentId :: NgramsParentId -> Column PGInt4
99 pgNgramsParentId (NgramsParentId n) = pgInt4 n
100
101 $(makeAdaptorAndInstance "pNodeNgram" ''NodeNgramPoly)
102 makeLenses ''NodeNgramPoly
103
104 nodeNgramTable :: Table NodeNgramWrite NodeNgramRead
105 nodeNgramTable = Table "nodes_ngrams"
106 ( pNodeNgram NodeNgram
107 { nng_node_id = required "node_id"
108 , nng_ngrams_id = required "ngrams_id"
109 , nng_parent_id = optional "parent_id"
110 , nng_ngramsType = required "ngrams_type"
111 , nng_listType = required "list_type"
112 , nng_weight = required "weight"
113 }
114 )
115
116 queryNodeNgramTable :: Query NodeNgramRead
117 queryNodeNgramTable = queryTable nodeNgramTable
118
119 insertNodeNgrams :: [NodeNgram] -> Cmd err Int
120 insertNodeNgrams = insertNodeNgramW
121 . map (\(NodeNgram n g p ngt lt w) ->
122 NodeNgram (pgNodeId n)
123 (pgInt4 g)
124 (pgNgramsParentId <$> p)
125 (pgNgramsTypeId ngt)
126 (pgInt4 lt)
127 (pgDouble w)
128 )
129
130 insertNodeNgramW :: [NodeNgramWrite] -> Cmd err Int
131 insertNodeNgramW nns =
132 mkCmd $ \c -> fromIntegral <$> runInsert_ c insertNothing
133 where
134 insertNothing = (Insert { iTable = nodeNgramTable
135 , iRows = nns
136 , iReturning = rCount
137 , iOnConflict = (Just DoNothing)
138 })
139
140 type NgramsText = Text
141
142 updateNodeNgrams' :: ListId -> [(NgramsTypeId, NgramsText, ListTypeId)] -> Cmd err ()
143 updateNodeNgrams' _ [] = pure ()
144 updateNodeNgrams' listId input = void $ execPGSQuery updateQuery (DPS.Only $ Values fields input')
145 where
146 fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4"]
147 input' = map (\(nt,t,lt) -> (listId, nt, t, lt)) input
148
149 updateNodeNgrams'_debug :: ListId -> [(NgramsTypeId, NgramsText, ListTypeId)] -> Cmd err ByteString
150 updateNodeNgrams'_debug listId input = formatPGSQuery updateQuery (DPS.Only $ Values fields input')
151 where
152 fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4"]
153 input' = map (\(nt,t,lt) -> (listId, nt, t, lt)) input
154
155 updateQuery :: DPS.Query
156 updateQuery = [sql|
157 WITH new(node_id,ngrams_type,terms,typeList) as (?)
158
159 INSERT into nodes_ngrams (node_id,ngrams_id,ngrams_type,list_type,weight)
160
161 SELECT node_id,ngrams.id,ngrams_type,typeList,1 FROM new
162 JOIN ngrams ON ngrams.terms = new.terms
163 ON CONFLICT (node_id, ngrams_id, ngrams_type) DO
164 -- DO NOTHING
165
166 UPDATE SET list_type = excluded.list_type
167 ;
168 |]
169
170 data Action = Del | Add
171 type NgramsParent = Text
172 type NgramsChild = Text
173
174 {-
175 ngramsGroup :: Action -> ListId -> [(NgramsTypeId, NgramsParent, NgramsChild)] -> Cmd err ()
176 ngramsGroup _ _ [] = pure ()
177 ngramsGroup a lid input = void $ trace (show input) $ execPGSQuery (ngramsGroupQuery a) (DPS.Only $ Values fields input')
178 where
179 fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4","text","text"]
180 input' = map (\(ntpid,p,c) -> (lid, nodeTypeId NodeList, userMaster, ntpid, p,c)) input
181 -}
182
183 ngramsGroupQuery :: Action -> DPS.Query
184 ngramsGroupQuery a = case a of
185 Add -> [sql|
186 WITH input(lid,listTypeId,masterUsername,ntype,parent_terms,child_terms)
187 AS (?),
188 -- (VALUES (15::"int4",5::"int4",'gargantua'::"text",4::"int4",'input'::"text",'designer'::"text"))),
189
190 list_master AS (
191 SELECT n.id from nodes n
192 JOIN auth_user u ON n.user_id = u.id
193 JOIN input ON n.typename = input.listTypeId
194 WHERE u.username = input.masterUsername
195 LIMIT 1
196 ),
197
198 list_user AS(
199 -- FIRST import parent from master to user list
200 INSERT INTO nodes_ngrams (node_id,ngrams_id,parent_id,ngrams_type,list_type,weight)
201 SELECT input.lid,nn.ngrams_id,nn.parent_id,nn.ngrams_type,nn.list_type, nn.weight
202 FROM INPUT
203 JOIN ngrams ng ON ng.terms = input.parent_terms
204 JOIN nodes_ngrams nn ON nn.ngrams_id = ng.id
205 JOIN list_master ON nn.node_id = list_master.id
206 WHERE
207 nn.ngrams_id = ng.id
208 ON CONFLICT (node_id,ngrams_id,ngrams_type) DO NOTHING
209 )
210
211 INSERT INTO nodes_ngrams (node_id,ngrams_id,parent_id,ngrams_type,list_type,weight)
212
213 SELECT input.lid, nc.id, nnpu.id, input.ntype, nnmaster.list_type, nnmaster.weight
214 FROM input
215
216 JOIN ngrams np ON np.terms = input.parent_terms
217 JOIN ngrams nc ON nc.terms = input.child_terms
218
219 JOIN nodes_ngrams nnpu ON nnpu.ngrams_id = np.id
220 JOIN nodes_ngrams nnmaster ON nnmaster.ngrams_id = nc.id
221 JOIN list_master ON nnmaster.node_id = list_master.id
222
223 WHERE
224 nnpu.node_id = input.lid
225 AND nnpu.ngrams_type = input.ntype
226
227 AND nnmaster.ngrams_id = nc.id
228 AND nnmaster.ngrams_type = ntype
229
230 ON CONFLICT (node_id,ngrams_id,ngrams_type) DO
231 UPDATE SET parent_id = excluded.parent_id
232
233
234 |]
235 Del -> [sql|
236 WITH input(lid,listTypeId,masterUsername,ntype,parent_terms,child_terms)
237 AS (?),
238 -- (VALUES (15::"int4",5::"int4",'gargantua'::"text",4::"int4",'input'::"text",'designer'::"text"))),
239
240 list_master AS (
241 SELECT n.id from nodes n
242 JOIN auth_user u ON n.user_id = u.id
243 JOIN input ON n.typename = input.listTypeId
244 WHERE u.username = input.masterUsername
245 LIMIT 1
246 ),
247
248 list_user AS(
249 -- FIRST import parent from master to user list
250 INSERT INTO nodes_ngrams (node_id,ngrams_id,parent_id,ngrams_type,list_type,weight)
251 SELECT input.lid,nn.ngrams_id,nn.parent_id,nn.ngrams_type,nn.list_type, nn.weight
252 FROM INPUT
253 JOIN ngrams ng ON ng.terms = input.parent_terms
254 JOIN nodes_ngrams nn ON nn.ngrams_id = ng.id
255 JOIN list_master ON nn.node_id = list_master.id
256 WHERE
257 nn.ngrams_id = ng.id
258 ON CONFLICT (node_id,ngrams_id,ngrams_type) DO NOTHING
259 )
260
261 INSERT INTO nodes_ngrams (node_id,ngrams_id,parent_id,ngrams_type,list_type,weight)
262
263 SELECT input.lid, nc.id, NULL, input.ntype, nnmaster.list_type, nnmaster.weight
264 FROM input
265
266 JOIN ngrams np ON np.terms = input.parent_terms
267 JOIN ngrams nc ON nc.terms = input.child_terms
268
269 JOIN nodes_ngrams nnmaster ON nnmaster.ngrams_id = nc.id
270 JOIN list_master ON nnmaster.node_id = list_master.id
271
272 WHERE
273 nnmaster.ngrams_id = nc.id
274 AND nnmaster.ngrams_type = ntype
275
276 ON CONFLICT (node_id,ngrams_id,ngrams_type) DO
277 UPDATE SET parent_id = NULL
278
279 |]
280
281
282 data NodeNgramsUpdate = NodeNgramsUpdate
283 { _nnu_user_list_id :: ListId
284 , _nnu_lists_update :: [(NgramsTypeId, NgramsText, ListTypeId)]
285 , _nnu_add_children :: [(NgramsTypeId, NgramsParent, NgramsChild)]
286 , _nnu_rem_children :: [(NgramsTypeId, NgramsParent, NgramsChild)]
287 }
288
289 -- TODO wrap these updates in a transaction.
290 -- TODO-ACCESS:
291 -- * check userId CanUpdateNgrams userListId
292
293 {-
294 updateNodeNgrams :: NodeNgramsUpdate -> Cmd err ()
295 updateNodeNgrams nnu = do
296 updateNodeNgrams' userListId $ _nnu_lists_update nnu
297 ngramsGroup Del userListId $ _nnu_rem_children nnu
298 ngramsGroup Add userListId $ _nnu_add_children nnu
299 -- TODO remove duplicate line (fix SQL query)
300 ngramsGroup Add userListId $ _nnu_add_children nnu
301 where
302 userListId = _nnu_user_list_id nnu
303 -}