]> Git — Sourcephile - gargantext.git/blob - bin/gargantext-upgrade/Main.hs
Merge branch 'dev' into 104-dev-john-snow-nlp
[gargantext.git] / bin / gargantext-upgrade / Main.hs
1 {-|
2 Module : Main.hs
3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Import a corpus binary.
11
12 -}
13
14 {-# LANGUAGE Strict #-}
15 {-# LANGUAGE QuasiQuotes #-}
16
17 module Main where
18
19
20 import Data.Either (Either(..))
21 import Database.PostgreSQL.Simple.SqlQQ (sql)
22 import GHC.IO.Exception (IOException)
23 import Gargantext.API.Admin.EnvTypes (DevEnv)
24 import Gargantext.API.Dev (withDevEnv, runCmdDev)
25 import Gargantext.API.Node () -- instances only
26 import Gargantext.API.Prelude (GargError)
27 import Gargantext.Core (HasDBid(toDBid))
28 import Gargantext.Core.Types.Individu (User(..))
29 import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
30 import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
31 import Gargantext.Database.Admin.Trigger.Init
32 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
33 import Gargantext.Database.Admin.Types.Node (NodeType(NodeDocument, NodeContact))
34 import Gargantext.Database.Prelude (Cmd'', Cmd, execPGSQuery)
35 import Gargantext.Database.Query.Table.Node (getOrMkList)
36 import Gargantext.Prelude
37 import Gargantext.Prelude.Config (GargConfig(..), readConfig)
38 import Prelude (getLine)
39 import System.Environment (getArgs)
40 import qualified Data.List as List (cycle, concat, take, unlines)
41
42 main :: IO ()
43 main = do
44
45 let ___ = putStrLn $ List.concat $ List.take 72 $ List.cycle ["_"]
46
47 ___
48 putStrLn "GarganText upgrade to version 0.0.5"
49 ___
50
51 params@[iniPath] <- getArgs
52 _ <- if length params /= 1
53 then panic "Usage: ./gargantext-upgrade gargantext.ini"
54 else pure ()
55
56 putStrLn $ List.unlines
57 [ "Your Database defined in gargantext.ini will be upgraded."
58 , "We stronlgy recommend you to make a backup using pg_dump."
59 , ""
60 , "If you encounter issues, please report your bugs here:"
61 , "https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/101"
62 , ""
63 , "Press ENTER if you want to continue, CTRL+C if you want to stop."
64 ]
65
66 _ok <- getLine
67
68 cfg <- readConfig iniPath
69 let secret = _gc_secretkey cfg
70
71 let
72 contextsTriggers :: Cmd GargError ()
73 contextsTriggers = do
74 (masterUserId, _masterRootId, masterCorpusId)
75 <- getOrMk_RootWithCorpus (UserName userMaster)
76 (Left corpusMasterName)
77 (Nothing :: Maybe HyperdataCorpus)
78 masterListId <- getOrMkList masterCorpusId masterUserId
79 _triggers <- initLastTriggers masterListId
80 pure ()
81
82
83 withDevEnv iniPath $ \env -> do
84 -- First upgrade the Database Schema
85 _ <- runCmdDev env sqlSchema
86
87 -- Then upgrade the triggers
88 _ <- runCmdDev env (initFirstTriggers secret :: Cmd GargError [Int64])
89 _ <- runCmdDev env (contextsTriggers :: Cmd GargError ())
90
91 -- Move nodes to contexts table
92 _ <- runCmdDev env sqlNodes2Contexts
93
94 -- Update the hashes
95 _ <- runCmdDev env sqlUpdateTriggerHash
96
97 ___
98 putStrLn "Uprade done with success !"
99 ___
100 pure ()
101
102
103
104 sqlUpdateTriggerHash :: Cmd'' DevEnv IOException Int64
105 sqlUpdateTriggerHash = do
106 execPGSQuery query ()
107 where
108 query = [sql|
109 UPDATE nodes SET typename = typename;
110 UPDATE contexts SET typename = typename;
111 |]
112
113
114 sqlNodes2Contexts :: Cmd'' DevEnv IOException Int64
115 sqlNodes2Contexts = do
116 execPGSQuery query (toDBid NodeDocument,toDBid NodeContact)
117 where
118 query = [sql|
119 -- WITH docs (id,hash_id,typename,user_id,parent_id,name,date,hyperdata, search)
120 WITH docs AS (SELECT * from nodes WHERE nodes.typename IN (?,?)),
121
122 inserted (id, hash_id) AS (
123 INSERT INTO contexts (hash_id,typename,user_id,parent_id,name,date,hyperdata, search)
124 SELECT d.hash_id,d.typename,d.user_id,NULL,d.name,d.date,d.hyperdata,search FROM docs AS d
125 RETURNING contexts.id, contexts.hash_id
126 ),
127
128 indexed (node_id, context_id) AS (
129 SELECT docs.id, inserted.id from inserted
130 JOIN docs on docs.hash_id = inserted.hash_id
131 ),
132
133 -- nodes_nodes -> nodes_contexts
134 nodes_contexts_query AS (
135 INSERT INTO nodes_contexts (node_id, context_id,score, category)
136 SELECT nn.node1_id,i.context_id,nn.score,nn.category FROM nodes_nodes nn
137 JOIN indexed i ON i.node_id = nn.node2_id
138 ),
139
140 -- nodes_nodes_ngrams -> contexts_nodes_ngrams
141 contexts_nodes_ngrams_query AS (
142 INSERT INTO context_node_ngrams
143 SELECT i.context_id, nnn.node1_id, nnn.ngrams_id, nnn.ngrams_type, nnn.weight FROM node_node_ngrams nnn
144 JOIN indexed i ON i.node_id = nnn.node2_id
145 ),
146
147 ---- nodes_nodes_ngrams2 -> contexts_nodes_ngrams2
148 context_node_ngrams2_query AS (
149 INSERT INTO context_node_ngrams2
150 SELECT i.context_id, nnn2.nodengrams_id, nnn2.weight FROM node_node_ngrams2 nnn2
151 JOIN indexed i ON i.node_id = nnn2.node_id
152 )
153
154 -- WITH CASCADE it should update others tables
155 DELETE FROM nodes n
156 USING indexed i WHERE i.node_id = n.id
157 ;
158
159 UPDATE contexts SET parent_id = id;
160
161
162 |]
163
164
165
166
167
168 sqlSchema :: Cmd'' DevEnv IOException Int64
169 sqlSchema = do
170 execPGSQuery query ()
171 where
172 query = [sql|
173 -- TODO typename -> type_id
174 CREATE TABLE public.contexts (
175 id SERIAL,
176 hash_id CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
177 typename INTEGER NOT NULL,
178 user_id INTEGER NOT NULL,
179 parent_id INTEGER REFERENCES public.contexts(id) ON DELETE CASCADE ,
180 name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
181 date TIMESTAMP with time zone DEFAULT now() NOT NULL,
182 hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
183 search tsvector,
184 PRIMARY KEY (id),
185 FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE
186 );
187 ALTER TABLE public.contexts OWNER TO gargantua;
188
189
190
191 -- To attach contexts to a Corpus
192 CREATE TABLE public.nodes_contexts (
193 node_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
194 context_id INTEGER NOT NULL REFERENCES public.contexts(id) ON DELETE CASCADE,
195 score REAL ,
196 category INTEGER ,
197 PRIMARY KEY (node_id, context_id)
198 );
199 ALTER TABLE public.nodes_contexts OWNER TO gargantua;
200
201 ---------------------------------------------------------------
202 CREATE TABLE public.context_node_ngrams (
203 context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
204 node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
205 ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
206 ngrams_type INTEGER ,
207 weight double precision,
208 PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type)
209 );
210
211
212 ALTER TABLE public.context_node_ngrams OWNER TO gargantua;
213
214 CREATE TABLE public.context_node_ngrams2 (
215 context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
216 nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
217 weight double precision,
218 PRIMARY KEY (context_id, nodengrams_id)
219 );
220 ALTER TABLE public.context_node_ngrams2 OWNER TO gargantua;
221
222
223
224 CREATE INDEX ON public.contexts USING gin (hyperdata);
225 CREATE INDEX ON public.contexts USING btree (user_id, typename, parent_id);
226 CREATE INDEX ON public.contexts USING btree (id, typename, date ASC);
227 CREATE INDEX ON public.contexts USING btree (id, typename, date DESC);
228 CREATE INDEX ON public.contexts USING btree (typename, id);
229 CREATE UNIQUE INDEX ON public.contexts USING btree (hash_id);
230
231
232 -- To make the links between Corpus Node and its contexts
233 CREATE UNIQUE INDEX ON public.nodes_contexts USING btree (node_id, context_id);
234 CREATE INDEX ON public.nodes_contexts USING btree (node_id, context_id, category);
235
236
237 ------------------------------------------------------------------------
238 CREATE UNIQUE INDEX ON public.context_node_ngrams USING btree (context_id, node_id, ngrams_id, ngrams_type);
239 CREATE INDEX ON public.context_node_ngrams USING btree (context_id, node_id);
240 CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_id, node_id);
241 CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_type);
242
243 CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id);
244 CREATE INDEX ON public.context_node_ngrams2 USING btree (nodengrams_id);
245 CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id, nodengrams_id);
246
247 DROP TABLE if EXISTS public.node_nodengrams_nodengrams;
248
249 DROP TRIGGER if EXISTS trigger_count_delete2 ON nodes_nodes;
250 DROP TRIGGER if EXISTS trigger_count_update_add ON nodes_nodes;
251 DROP TRIGGER if EXISTS trigger_delete_count ON nodes_nodes;
252 DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
253
254
255 -- Indexes needed to speed up the deletes
256 -- Trigger for constraint node_ngrams_node_id_fkey
257 CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx ON public.node_ngrams USING btree (node_id);
258
259 -- Trigger for constraint node_node_ngrams2_node_id_fkey
260 CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
261
262 -- Trigger for constraint node_node_ngrams_node1_id_fkey
263 CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx ON public.node_node_ngrams USING btree (node1_id);
264
265 -- Trigger for constraint node_node_ngrams_node2_id_fkey
266 CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx ON public.node_node_ngrams USING btree (node2_id);
267
268 -- Trigger for constraint nodes_nodes_node1_id_fkey
269 CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
270 -- Trigger for constraint nodes_nodes_node2_id_fkey
271 CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
272
273 -- Trigger for constraint nodes_parent_id_fkey
274 CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
275
276 -- Trigger for constraint rights_node_id_fkey
277 CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
278
279 -- Trigger for constraint nodes_contexts_node_id_fkey
280 CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
281
282 -- Trigger for constraint context_node_ngrams_node_id_fkey
283 CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);
284
285 |]
286
287
288