]> Git — Sourcephile - gargantext.git/blob - devops/postgres/schema.sql
[FIX] update schema
[gargantext.git] / devops / postgres / schema.sql
1 CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
2 COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';
3
4 CREATE EXTENSION IF NOT EXISTS tsm_system_rows;
5 CREATE EXTENSION pgcrypto;
6
7 -----------------------------------------------------------------
8 CREATE TABLE public.auth_user (
9 id SERIAL,
10 password CHARACTER varying(128) NOT NULL,
11 last_login TIMESTAMP with time zone,
12 is_superuser BOOLEAN NOT NULL,
13 username CHARACTER varying(150) NOT NULL,
14 first_name CHARACTER varying(30) NOT NULL,
15 last_name CHARACTER varying(30) NOT NULL,
16 email CHARACTER varying(254) NOT NULL,
17 is_staff BOOLEAN NOT NULL,
18 is_active BOOLEAN NOT NULL,
19 date_joined TIMESTAMP with time zone DEFAULT now() NOT NULL,
20 forgot_password_uuid TEXT,
21 PRIMARY KEY (id)
22 );
23 ALTER TABLE public.auth_user OWNER TO gargantua;
24 -----------------------------------------------------------------
25
26 -- TODO add publication_date
27 -- TODO typename -> type_id
28 CREATE TABLE public.nodes (
29 id SERIAL,
30 hash_id CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
31 typename INTEGER NOT NULL,
32 user_id INTEGER NOT NULL,
33 parent_id INTEGER REFERENCES public.nodes(id) ON DELETE CASCADE ,
34 name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
35 date TIMESTAMP with time zone DEFAULT now() NOT NULL,
36 hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
37 search tsvector,
38 PRIMARY KEY (id),
39 FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE
40 );
41 ALTER TABLE public.nodes OWNER TO gargantua;
42 --------------------------------------------------------------
43
44 -- TODO add publication_date
45 -- TODO typename -> type_id
46 CREATE TABLE public.contexts (
47 id SERIAL,
48 hash_id CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
49 typename INTEGER NOT NULL,
50 user_id INTEGER NOT NULL,
51 parent_id INTEGER REFERENCES public.contexts(id) ON DELETE CASCADE ,
52 name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
53 date TIMESTAMP with time zone DEFAULT now() NOT NULL,
54 hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
55 search tsvector,
56 PRIMARY KEY (id),
57 FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE
58 );
59 ALTER TABLE public.contexts OWNER TO gargantua;
60
61 --------------------------------------------------------------
62 -- | Ngrams
63 CREATE TABLE public.ngrams (
64 id SERIAL,
65 terms CHARACTER varying(255),
66 n INTEGER,
67 PRIMARY KEY (id)
68 );
69 ALTER TABLE public.ngrams OWNER TO gargantua;
70
71 -- | Ngrams PosTag
72 CREATE TABLE public.ngrams_postag (
73 id SERIAL ,
74 lang_id INTEGER ,
75 algo_id INTEGER ,
76 postag CHARACTER varying(5) ,
77 ngrams_id INTEGER NOT NULL ,
78 lemm_id INTEGER NOT NULL ,
79 score INTEGER DEFAULT 1 ::integer NOT NULL ,
80 FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE,
81 FOREIGN KEY (lemm_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
82 );
83 ALTER TABLE public.ngrams_postag OWNER TO gargantua;
84
85 --------------------------------------------------------------
86 -- Node here should have type NodeList
87 CREATE TABLE public.node_ngrams (
88 id SERIAL ,
89 node_id INTEGER NOT NULL ,
90 node_subtype INTEGER ,
91 ngrams_id INTEGER NOT NULL ,
92 ngrams_type INTEGER , -- change to ngrams_field? (no for pedagogic reason)
93 ngrams_field INTEGER ,
94 ngrams_tag INTEGER ,
95 ngrams_class INTEGER ,
96 weight double precision ,
97 PRIMARY KEY (id) ,
98 FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE ,
99 FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
100 );
101 ALTER TABLE public.node_ngrams OWNER TO gargantua;
102
103 --CREATE TABLE public.context_nodengrams_nodengrams (
104 -- context_id INTEGER NOT NULL ,
105 -- node_ngrams1_id INTEGER NOT NULL ,
106 -- node_ngrams2_id INTEGER NOT NULL ,
107 -- weight double precision ,
108 -- FOREIGN KEY (node_id) REFERENCES public.contexts(id) ON DELETE CASCADE ,
109 -- FOREIGN KEY (node_ngrams1_id) REFERENCES public.node_ngrams(id) ON DELETE CASCADE,
110 -- FOREIGN KEY (node_ngrams2_id) REFERENCES public.node_ngrams(id) ON DELETE CASCADE,
111 -- PRIMARY KEY (node_id, node_ngrams1_id, node_ngrams2_id)
112 --);
113 --ALTER TABLE public.context_nodengrams_nodengrams OWNER TO gargantua;
114
115 --------------------------------------------------------------
116 --------------------------------------------------------------
117 --
118 --CREATE TABLE public.nodes_ngrams_ngrams (
119 -- node_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
120 -- ngram1_id integer NOT NULL REFERENCES public.ngrams(id) ON DELETE CASCADE,
121 -- ngram2_id integer NOT NULL REFERENCES public.ngrams(id) ON DELETE CASCADE,
122 -- weight double precision,
123 -- PRIMARY KEY (node_id,ngram1_id,ngram2_id)
124 --);
125 --
126 --ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua;
127 ---------------------------------------------------------------
128 -- TODO nodes_nodes(node1_id int, node2_id int, edge_type int , weight real)
129 CREATE TABLE public.nodes_nodes (
130 node1_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
131 node2_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
132 score REAL ,
133 category INTEGER ,
134 PRIMARY KEY (node1_id, node2_id)
135 );
136 ALTER TABLE public.nodes_nodes OWNER TO gargantua;
137
138
139
140 -- To attach contexts to a Corpus
141 CREATE TABLE public.nodes_contexts (
142 id SERIAL ,
143 node_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
144 context_id INTEGER NOT NULL REFERENCES public.contexts(id) ON DELETE CASCADE,
145 score REAL ,
146 category INTEGER ,
147 PRIMARY KEY (id)
148 );
149 ALTER TABLE public.nodes_contexts OWNER TO gargantua;
150
151 CREATE TABLE public.nodescontexts_nodescontexts (
152 nodescontexts1 INTEGER NOT NULL REFERENCES public.nodes_contexts(id) ON DELETE CASCADE,
153 nodescontexts2 INTEGER NOT NULL REFERENCES public.nodes_contexts(id) ON DELETE CASCADE,
154
155 PRIMARY KEY (nodescontexts1, nodescontexts2)
156 );
157 ALTER TABLE public.nodescontexts_nodescontexts OWNER TO gargantua;
158
159
160 ---------------------------------------------------------------
161 CREATE TABLE public.context_node_ngrams (
162 context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
163 node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
164 ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
165 ngrams_type INTEGER ,
166 weight double precision,
167 doc_count INTEGER DEFAULT 0,
168 PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type)
169 );
170 ALTER TABLE public.context_node_ngrams OWNER TO gargantua;
171
172 CREATE TABLE public.context_node_ngrams2 (
173 context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
174 nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
175 weight double precision,
176 PRIMARY KEY (context_id, nodengrams_id)
177 );
178 ALTER TABLE public.context_node_ngrams2 OWNER TO gargantua;
179
180
181 --------------------------------------------------------------------
182 CREATE TABLE public.node_node_ngrams (
183 node1_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
184 node2_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
185 ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
186 ngrams_type INTEGER,
187 weight double precision,
188 PRIMARY KEY (node1_id, node2_id, ngrams_id, ngrams_type)
189 );
190 ALTER TABLE public.node_node_ngrams OWNER TO gargantua;
191
192 CREATE TABLE public.node_node_ngrams2 (
193 node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
194 nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
195 weight double precision,
196 PRIMARY KEY (node_id, nodengrams_id)
197 );
198 ALTER TABLE public.node_node_ngrams2 OWNER TO gargantua;
199
200
201 --------------------------------------------------------------
202
203 --CREATE TABLE public.nodes_ngrams_repo (
204 -- version integer NOT NULL,
205 -- patches jsonb DEFAULT '{}'::jsonb NOT NULL,
206 -- PRIMARY KEY (version)
207 --);
208 --ALTER TABLE public.nodes_ngrams_repo OWNER TO gargantua;
209
210 ---------------------------------------------------------
211
212 -- If needed for rights management at row level
213 -- CREATE EXTENSION IF NOT EXISTS acl WITH SCHEMA public;
214 CREATE TABLE public.rights (
215 user_id INTEGER NOT NULL REFERENCES public.auth_user(id) ON DELETE CASCADE,
216 node_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
217 rights INTEGER NOT NULL,
218 PRIMARY KEY (user_id, node_id)
219 );
220 ALTER TABLE public.rights OWNER TO gargantua;
221
222 ------------------------------------------------------------
223 -- Node Story
224
225 CREATE TABLE public.node_stories (
226 id SERIAL,
227 node_id INTEGER NOT NULL,
228 version INTEGER NOT NULL,
229 ngrams_type_id INTEGER NOT NULL,
230 ngrams_id INTEGER NOT NULL,
231 --children TEXT[],
232 ngrams_repo_element jsonb DEFAULT '{}'::jsonb NOT NULL,
233 PRIMARY KEY (id),
234 FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
235 FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
236 );
237 ALTER TABLE public.node_stories OWNER TO gargantua;
238
239 CREATE UNIQUE INDEX ON public.node_stories USING btree (node_id, ngrams_type_id, ngrams_id);
240
241
242 create table public.node_story_archive_history (
243 id SERIAL,
244 node_id INTEGER NOT NULL,
245 ngrams_type_id INTEGER NOT NULL,
246 ngrams_id INTEGER NOT NULL,
247 patch jsonb DEFAULT '{}'::jsonb NOT NULL,
248 version INTEGER NOT NULL DEFAULT 0,
249 PRIMARY KEY (id),
250 FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
251 FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
252 );
253 ALTER TABLE public.node_story_archive_history OWNER TO gargantua;
254
255
256 ------------------------------------------------------------
257 -- INDEXES
258 CREATE INDEX ON public.auth_user USING btree (username varchar_pattern_ops);
259 CREATE UNIQUE INDEX ON public.auth_user USING btree (username);
260
261 CREATE INDEX ON public.rights USING btree (user_id,node_id);
262
263 CREATE INDEX ON public.nodes USING gin (hyperdata);
264 CREATE INDEX ON public.nodes USING btree (user_id, typename, parent_id);
265 CREATE INDEX ON public.nodes USING btree (id, typename, date ASC);
266 CREATE INDEX ON public.nodes USING btree (id, typename, date DESC);
267 CREATE INDEX ON public.nodes USING btree (typename, id);
268 CREATE UNIQUE INDEX ON public.nodes USING btree (hash_id);
269
270 CREATE INDEX ON public.contexts USING gin (hyperdata);
271 CREATE INDEX ON public.contexts USING btree (user_id, typename, parent_id);
272 CREATE INDEX ON public.contexts USING btree (id, typename, date ASC);
273 CREATE INDEX ON public.contexts USING btree (id, typename, date DESC);
274 CREATE INDEX ON public.contexts USING btree (typename, id);
275 CREATE UNIQUE INDEX ON public.contexts USING btree (hash_id);
276
277 CREATE INDEX ON public.nodescontexts_nodescontexts USING btree (nodescontexts1, nodescontexts2);
278 -- CREATE UNIQUE INDEX ON public.nodes USING btree (((hyperdata ->> 'uniqId'::text)));
279 -- CREATE UNIQUE INDEX ON public.nodes USING btree (((hyperdata ->> 'uniqIdBdd'::text)));
280 -- CREATE UNIQUE INDEX ON public.nodes USING btree (typename, parent_id, ((hyperdata ->> 'uniqId'::text)));
281
282 CREATE UNIQUE INDEX ON public.ngrams (terms); -- TEST GIN
283 CREATE INDEX ON public.ngrams USING btree (id, terms);
284 CREATE UNIQUE INDEX ON public.ngrams_postag (lang_id,algo_id,postag,ngrams_id,lemm_id);
285
286 -- To save the Node Ngrams Repo
287 CREATE INDEX ON public.node_ngrams USING btree (node_id,node_subtype);
288 CREATE UNIQUE INDEX ON public.node_ngrams USING btree (node_id,node_subtype, ngrams_id);
289
290
291
292 -- To make the links between Nodes in Tree/Forest
293 CREATE UNIQUE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id);
294 CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, category);
295
296
297 -- To make the links between Corpus Node and its contexts
298 CREATE UNIQUE INDEX ON public.nodes_contexts USING btree (node_id, context_id);
299 CREATE INDEX ON public.nodes_contexts USING btree (node_id, context_id, category);
300
301
302 ------------------------------------------------------------------------
303 CREATE UNIQUE INDEX ON public.context_node_ngrams USING btree (context_id, node_id, ngrams_id, ngrams_type);
304 CREATE INDEX ON public.context_node_ngrams USING btree (context_id, node_id);
305 CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_id, node_id);
306 CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_type);
307
308 CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id);
309 CREATE INDEX ON public.context_node_ngrams2 USING btree (nodengrams_id);
310 CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id, nodengrams_id);
311
312
313 CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type);
314 CREATE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id);
315 CREATE INDEX ON public.node_node_ngrams USING btree (ngrams_id, node2_id);
316 CREATE INDEX ON public.node_node_ngrams USING btree (ngrams_type);
317 CREATE INDEX ON public.node_node_ngrams2 USING btree (node_id);
318 CREATE INDEX ON public.node_node_ngrams2 USING btree (nodengrams_id);
319 CREATE INDEX ON public.node_node_ngrams2 USING btree (node_id, nodengrams_id);
320
321 -- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (context_id, node_ngrams1_id, node_ngrams2_id);
322 -- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (node_ngrams1_id);
323 -- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (node_ngrams2_id);
324
325
326 ------------------------------------------------------------------------
327 -- Ngrams Full DB Extraction Optim
328 -- TODO remove hard parameter and move elsewhere
329 CREATE OR REPLACE function node_pos(int, int) returns bigint
330 AS 'SELECT count(id) from nodes
331 WHERE id < $1
332 AND typename = $2
333 '
334 LANGUAGE SQL immutable;
335
336 --drop index node_by_pos;
337 --create index node_by_pos on nodes using btree(node_pos(id,typename));
338
339 -- Optimization for Ngrams Table View
340 -- CREATE MATERIALIZED VIEW IF NOT EXISTS context_node_ngrams_view AS
341 -- SELECT DISTINCT context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
342 -- FROM nodes_contexts
343 -- JOIN context_node_ngrams
344 -- ON context_node_ngrams.context_id = nodes_contexts.context_id;
345
346 -- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_context_id_idx
347 -- ON context_node_ngrams_view(context_id);
348 -- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_ngrams_id_idx
349 -- ON context_node_ngrams_view(ngrams_id);
350 -- CREATE INDEX IF NOT EXISTS context_node_ngrams_view_node_id_idx
351 -- ON context_node_ngrams_view(node_id);
352 -- CREATE UNIQUE INDEX IF NOT EXISTS context_node_ngrams_view_context_ngrams_node_uniq_idx
353 -- ON context_node_ngrams_view (context_id, ngrams_id, node_id);
354
355 CREATE INDEX IF NOT EXISTS context_node_ngrams_context_id_ngrams_id_idx
356 ON context_node_ngrams(context_id, ngrams_id);
357
358 CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx
359 ON node_stories(ngrams_id);
360 ----
361
362
363
364 -- Version 0.0.6.9.9.6.4.sql
365 -- ADD triggers
366 CREATE OR REPLACE FUNCTION check_node_stories_json()
367 RETURNS TRIGGER AS $$
368 DECLARE
369 missing_ngrams_exist boolean;
370 BEGIN
371 WITH child_ngrams as
372 (SELECT jsonb_array_elements_text(NEW.ngrams_repo_element->'children') AS term),
373 parent_ngrams AS
374 (SELECT NEW.ngrams_repo_element->>'root' AS term),
375
376 ngrams_child_parent AS
377 (SELECT child_ngrams.term, ngrams.terms
378 FROM child_ngrams
379 LEFT JOIN ngrams ON child_ngrams.term = ngrams.terms
380 WHERE ngrams.terms IS NULL
381
382 UNION
383
384 SELECT parent_ngrams.term, ngrams.terms
385 FROM parent_ngrams
386 LEFT JOIN ngrams ON parent_ngrams.term = ngrams.terms
387 WHERE ngrams.terms IS NULL
388 AND parent_ngrams.term IS NOT NULL)
389
390 SELECT EXISTS(SELECT * FROM ngrams_child_parent) INTO missing_ngrams_exist;
391
392 IF missing_ngrams_exist THEN
393 RAISE EXCEPTION 'node_stories: ngrams are missing: %', row_to_json(NEW);
394 END IF;
395
396 RETURN NEW;
397 END;
398 $$ LANGUAGE plpgsql;
399
400 CREATE OR REPLACE TRIGGER check_node_stories_json_trg
401 AFTER INSERT OR UPDATE
402 ON node_stories
403 FOR EACH ROW
404 EXECUTE PROCEDURE check_node_stories_json();
405
406
407 CREATE OR REPLACE FUNCTION check_ngrams_json()
408 RETURNS TRIGGER AS $$
409 DECLARE
410 missing_ngrams_exist boolean;
411 BEGIN
412 WITH child_ngrams as
413 (SELECT jsonb_array_elements_text(ngrams_repo_element->'children') AS term
414 FROM node_stories
415 WHERE term = OLD.terms),
416 parent_ngrams AS
417 (SELECT ngrams_repo_element->>'root' AS term
418 FROM node_stories
419 WHERE term = OLD.terms),
420 child_parent_ngrams AS
421 (SELECT * FROM child_ngrams
422 UNION SELECT * FROM parent_ngrams)
423
424 SELECT EXISTS(SELECT * FROM child_parent_ngrams) INTO missing_ngrams_exist;
425
426 IF missing_ngrams_exist THEN
427 RAISE EXCEPTION 'ngrams are missing: %', row_to_json(OLD);
428 END IF;
429
430 RETURN OLD;
431 END;
432 $$ LANGUAGE plpgsql;
433
434 CREATE OR REPLACE TRIGGER check_ngrams_json_trg
435 AFTER DELETE
436 ON ngrams
437 FOR EACH ROW
438 EXECUTE PROCEDURE check_ngrams_json();
439
440
441
442
443