3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Import a corpus binary.
14 {-# LANGUAGE Strict #-}
15 {-# LANGUAGE QuasiQuotes #-}
20 import Data.Either (Either(..))
21 import Database.PostgreSQL.Simple.SqlQQ (sql)
22 import GHC.IO.Exception (IOException)
23 import Gargantext.API.Admin.EnvTypes (DevEnv)
24 import Gargantext.API.Dev (withDevEnv, runCmdDev)
25 import Gargantext.API.Node () -- instances only
26 import Gargantext.API.Prelude (GargError)
27 import Gargantext.Core (HasDBid(toDBid))
28 import Gargantext.Core.Types.Individu (User(..))
29 import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
30 import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
31 import Gargantext.Database.Admin.Trigger.Init
32 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
33 import Gargantext.Database.Admin.Types.Node (NodeType(NodeDocument, NodeContact))
34 import Gargantext.Database.Prelude (Cmd'', Cmd, execPGSQuery)
35 import Gargantext.Database.Query.Table.Node (getOrMkList)
36 import Gargantext.Prelude
37 import Gargantext.Prelude.Config (GargConfig(..), readConfig)
38 import Prelude (getLine)
39 import System.Environment (getArgs)
40 import qualified Data.List as List (cycle, concat, take, unlines)
45 let ___ = putStrLn $ List.concat $ List.take 72 $ List.cycle ["_"]
48 putStrLn "GarganText upgrade to version 0.0.5"
51 params@[iniPath] <- getArgs
52 _ <- if length params /= 1
53 then panic "Usage: ./gargantext-upgrade gargantext.ini"
56 putStrLn $ List.unlines
57 [ "Your Database defined in gargantext.ini will be upgraded."
58 , "We stronlgy recommend you to make a backup using pg_dump."
60 , "If you encounter issues, please report your bugs here:"
61 , "https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/101"
63 , "Press ENTER if you want to continue, CTRL+C if you want to stop."
68 cfg <- readConfig iniPath
69 let secret = _gc_secretkey cfg
72 contextsTriggers :: Cmd GargError ()
74 (masterUserId, _masterRootId, masterCorpusId)
75 <- getOrMk_RootWithCorpus (UserName userMaster)
76 (Left corpusMasterName)
77 (Nothing :: Maybe HyperdataCorpus)
78 masterListId <- getOrMkList masterCorpusId masterUserId
79 _triggers <- initLastTriggers masterListId
83 withDevEnv iniPath $ \env -> do
84 -- First upgrade the Database Schema
85 _ <- runCmdDev env sqlSchema
87 -- Then upgrade the triggers
88 _ <- runCmdDev env (initFirstTriggers secret :: Cmd GargError [Int64])
89 _ <- runCmdDev env (contextsTriggers :: Cmd GargError ())
91 -- Move nodes to contexts table
92 _ <- runCmdDev env sqlNodes2Contexts
95 _ <- runCmdDev env sqlUpdateTriggerHash
98 putStrLn "Uprade done with success !"
104 sqlUpdateTriggerHash :: Cmd'' DevEnv IOException Int64
105 sqlUpdateTriggerHash = do
106 execPGSQuery query ()
109 UPDATE nodes SET typename = typename;
110 UPDATE contexts SET typename = typename;
114 sqlNodes2Contexts :: Cmd'' DevEnv IOException Int64
115 sqlNodes2Contexts = do
116 execPGSQuery query (toDBid NodeDocument,toDBid NodeContact)
119 -- WITH docs (id,hash_id,typename,user_id,parent_id,name,date,hyperdata, search)
120 WITH docs AS (SELECT * from nodes WHERE nodes.typename IN (?,?)),
122 inserted (id, hash_id) AS (
123 INSERT INTO contexts (hash_id,typename,user_id,parent_id,name,date,hyperdata, search)
124 SELECT d.hash_id,d.typename,d.user_id,NULL,d.name,d.date,d.hyperdata,search FROM docs AS d
125 RETURNING contexts.id, contexts.hash_id
128 indexed (node_id, context_id) AS (
129 SELECT docs.id, inserted.id from inserted
130 JOIN docs on docs.hash_id = inserted.hash_id
133 -- nodes_nodes -> nodes_contexts
134 nodes_contexts_query AS (
135 INSERT INTO nodes_contexts (node_id, context_id,score, category)
136 SELECT nn.node1_id,i.context_id,nn.score,nn.category FROM nodes_nodes nn
137 JOIN indexed i ON i.node_id = nn.node2_id
140 -- nodes_nodes_ngrams -> contexts_nodes_ngrams
141 contexts_nodes_ngrams_query AS (
142 INSERT INTO context_node_ngrams
143 SELECT i.context_id, nnn.node1_id, nnn.ngrams_id, nnn.ngrams_type, nnn.weight FROM node_node_ngrams nnn
144 JOIN indexed i ON i.node_id = nnn.node2_id
147 ---- nodes_nodes_ngrams2 -> contexts_nodes_ngrams2
148 context_node_ngrams2_query AS (
149 INSERT INTO context_node_ngrams2
150 SELECT i.context_id, nnn2.nodengrams_id, nnn2.weight FROM node_node_ngrams2 nnn2
151 JOIN indexed i ON i.node_id = nnn2.node_id
154 -- WITH CASCADE it should update others tables
156 USING indexed i WHERE i.node_id = n.id
159 UPDATE contexts SET parent_id = id;
168 sqlSchema :: Cmd'' DevEnv IOException Int64
170 execPGSQuery query ()
173 -- TODO typename -> type_id
174 CREATE TABLE public.contexts (
176 hash_id CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
177 typename INTEGER NOT NULL,
178 user_id INTEGER NOT NULL,
179 parent_id INTEGER REFERENCES public.contexts(id) ON DELETE CASCADE ,
180 name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
181 date TIMESTAMP with time zone DEFAULT now() NOT NULL,
182 hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
185 FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE
187 ALTER TABLE public.contexts OWNER TO gargantua;
191 -- To attach contexts to a Corpus
192 CREATE TABLE public.nodes_contexts (
193 node_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
194 context_id INTEGER NOT NULL REFERENCES public.contexts(id) ON DELETE CASCADE,
197 PRIMARY KEY (node_id, context_id)
199 ALTER TABLE public.nodes_contexts OWNER TO gargantua;
201 ---------------------------------------------------------------
202 CREATE TABLE public.context_node_ngrams (
203 context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
204 node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
205 ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
206 ngrams_type INTEGER ,
207 weight double precision,
208 PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type)
212 ALTER TABLE public.context_node_ngrams OWNER TO gargantua;
214 CREATE TABLE public.context_node_ngrams2 (
215 context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
216 nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
217 weight double precision,
218 PRIMARY KEY (context_id, nodengrams_id)
220 ALTER TABLE public.context_node_ngrams2 OWNER TO gargantua;
224 CREATE INDEX ON public.contexts USING gin (hyperdata);
225 CREATE INDEX ON public.contexts USING btree (user_id, typename, parent_id);
226 CREATE INDEX ON public.contexts USING btree (id, typename, date ASC);
227 CREATE INDEX ON public.contexts USING btree (id, typename, date DESC);
228 CREATE INDEX ON public.contexts USING btree (typename, id);
229 CREATE UNIQUE INDEX ON public.contexts USING btree (hash_id);
232 -- To make the links between Corpus Node and its contexts
233 CREATE UNIQUE INDEX ON public.nodes_contexts USING btree (node_id, context_id);
234 CREATE INDEX ON public.nodes_contexts USING btree (node_id, context_id, category);
237 ------------------------------------------------------------------------
238 CREATE UNIQUE INDEX ON public.context_node_ngrams USING btree (context_id, node_id, ngrams_id, ngrams_type);
239 CREATE INDEX ON public.context_node_ngrams USING btree (context_id, node_id);
240 CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_id, node_id);
241 CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_type);
243 CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id);
244 CREATE INDEX ON public.context_node_ngrams2 USING btree (nodengrams_id);
245 CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id, nodengrams_id);
247 DROP TABLE if EXISTS public.node_nodengrams_nodengrams;
249 DROP TRIGGER if EXISTS trigger_count_delete2 ON nodes_nodes;
250 DROP TRIGGER if EXISTS trigger_count_update_add ON nodes_nodes;
251 DROP TRIGGER if EXISTS trigger_delete_count ON nodes_nodes;
252 DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
255 -- Indexes needed to speed up the deletes
256 -- Trigger for constraint node_ngrams_node_id_fkey
257 CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx ON public.node_ngrams USING btree (node_id);
259 -- Trigger for constraint node_node_ngrams2_node_id_fkey
260 CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
262 -- Trigger for constraint node_node_ngrams_node1_id_fkey
263 CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx ON public.node_node_ngrams USING btree (node1_id);
265 -- Trigger for constraint node_node_ngrams_node2_id_fkey
266 CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx ON public.node_node_ngrams USING btree (node2_id);
268 -- Trigger for constraint nodes_nodes_node1_id_fkey
269 CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
270 -- Trigger for constraint nodes_nodes_node2_id_fkey
271 CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
273 -- Trigger for constraint nodes_parent_id_fkey
274 CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
276 -- Trigger for constraint rights_node_id_fkey
277 CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
279 -- Trigger for constraint nodes_contexts_node_id_fkey
280 CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
282 -- Trigger for constraint context_node_ngrams_node_id_fkey
283 CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);