3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Import a corpus binary.
14 {-# LANGUAGE Strict #-}
15 {-# LANGUAGE QuasiQuotes #-}
20 import Data.Either (Either(..))
21 import Database.PostgreSQL.Simple.SqlQQ (sql)
22 import GHC.IO.Exception (IOException)
23 import Gargantext.API.Admin.EnvTypes (DevEnv)
24 import Gargantext.API.Dev (withDevEnv, runCmdDev)
25 import Gargantext.API.Node () -- instances only
26 import Gargantext.API.Prelude (GargError)
27 import Gargantext.API.Ngrams.Tools (migrateFromDirToDb)
28 import Gargantext.Core (HasDBid(toDBid))
29 import Gargantext.Core.Types.Individu (User(..))
30 import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
31 import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
32 import Gargantext.Database.Admin.Trigger.Init
33 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
34 import Gargantext.Database.Admin.Types.Node (NodeType(NodeDocument, NodeContact))
35 import Gargantext.Database.Prelude (Cmd'', Cmd, execPGSQuery)
36 import Gargantext.Database.Query.Table.Node (getOrMkList)
37 import Gargantext.Prelude
38 import Gargantext.Prelude.Config (GargConfig(..), readConfig)
39 import Prelude (getLine)
40 import System.Environment (getArgs)
41 import qualified Data.List as List (cycle, concat, take, unlines)
52 putStrLn "GarganText upgrade to version 0.0.6"
55 params@[iniPath] <- getArgs
56 _ <- if length params /= 1
57 then panic "Usage: ./gargantext-upgrade gargantext.ini"
60 putStrLn $ List.unlines
61 [ "Your Database defined in gargantext.ini will be upgraded."
62 , "We stronlgy recommend you to make a backup using pg_dump."
64 , "Press ENTER if you want to continue, CTRL+C if you want to stop."
69 cfg <- readConfig iniPath
70 let secret = _gc_secretkey cfg
72 withDevEnv iniPath $ \env -> do
73 -- First upgrade the Database Schema
74 _ <- runCmdDev env (migrateFromDirToDb :: Cmd GargError ())
77 putStrLn "Uprade done with success !"
83 sqlUpdateTriggerHash :: Cmd'' DevEnv IOException Int64
84 sqlUpdateTriggerHash = do
88 UPDATE nodes SET typename = typename;
89 UPDATE contexts SET typename = typename;
93 sqlNodes2Contexts :: Cmd'' DevEnv IOException Int64
94 sqlNodes2Contexts = do
95 execPGSQuery query (toDBid NodeDocument,toDBid NodeContact)
98 -- WITH docs (id,hash_id,typename,user_id,parent_id,name,date,hyperdata, search)
99 WITH docs AS (SELECT * from nodes WHERE nodes.typename IN (?,?)),
101 inserted (id, hash_id) AS (
102 INSERT INTO contexts (hash_id,typename,user_id,parent_id,name,date,hyperdata, search)
103 SELECT d.hash_id,d.typename,d.user_id,NULL,d.name,d.date,d.hyperdata,search FROM docs AS d
104 RETURNING contexts.id, contexts.hash_id
107 indexed (node_id, context_id) AS (
108 SELECT docs.id, inserted.id from inserted
109 JOIN docs on docs.hash_id = inserted.hash_id
112 -- nodes_nodes -> nodes_contexts
113 nodes_contexts_query AS (
114 INSERT INTO nodes_contexts (node_id, context_id,score, category)
115 SELECT nn.node1_id,i.context_id,nn.score,nn.category FROM nodes_nodes nn
116 JOIN indexed i ON i.node_id = nn.node2_id
119 -- nodes_nodes_ngrams -> contexts_nodes_ngrams
120 contexts_nodes_ngrams_query AS (
121 INSERT INTO context_node_ngrams
122 SELECT i.context_id, nnn.node1_id, nnn.ngrams_id, nnn.ngrams_type, nnn.weight FROM node_node_ngrams nnn
123 JOIN indexed i ON i.node_id = nnn.node2_id
126 ---- nodes_nodes_ngrams2 -> contexts_nodes_ngrams2
127 context_node_ngrams2_query AS (
128 INSERT INTO context_node_ngrams2
129 SELECT i.context_id, nnn2.nodengrams_id, nnn2.weight FROM node_node_ngrams2 nnn2
130 JOIN indexed i ON i.node_id = nnn2.node_id
133 -- WITH CASCADE it should update others tables
135 USING indexed i WHERE i.node_id = n.id
138 UPDATE contexts SET parent_id = id;
147 sqlSchema :: Cmd'' DevEnv IOException Int64
149 execPGSQuery query ()
152 -- TODO typename -> type_id
153 CREATE TABLE public.contexts (
155 hash_id CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
156 typename INTEGER NOT NULL,
157 user_id INTEGER NOT NULL,
158 parent_id INTEGER REFERENCES public.contexts(id) ON DELETE CASCADE ,
159 name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
160 date TIMESTAMP with time zone DEFAULT now() NOT NULL,
161 hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
164 FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE
166 ALTER TABLE public.contexts OWNER TO gargantua;
170 -- To attach contexts to a Corpus
171 CREATE TABLE public.nodes_contexts (
172 node_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
173 context_id INTEGER NOT NULL REFERENCES public.contexts(id) ON DELETE CASCADE,
176 PRIMARY KEY (node_id, context_id)
178 ALTER TABLE public.nodes_contexts OWNER TO gargantua;
180 ---------------------------------------------------------------
181 CREATE TABLE public.context_node_ngrams (
182 context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
183 node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
184 ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
185 ngrams_type INTEGER ,
186 weight double precision,
187 PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type)
191 ALTER TABLE public.context_node_ngrams OWNER TO gargantua;
193 CREATE TABLE public.context_node_ngrams2 (
194 context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
195 nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
196 weight double precision,
197 PRIMARY KEY (context_id, nodengrams_id)
199 ALTER TABLE public.context_node_ngrams2 OWNER TO gargantua;
203 CREATE INDEX ON public.contexts USING gin (hyperdata);
204 CREATE INDEX ON public.contexts USING btree (user_id, typename, parent_id);
205 CREATE INDEX ON public.contexts USING btree (id, typename, date ASC);
206 CREATE INDEX ON public.contexts USING btree (id, typename, date DESC);
207 CREATE INDEX ON public.contexts USING btree (typename, id);
208 CREATE UNIQUE INDEX ON public.contexts USING btree (hash_id);
211 -- To make the links between Corpus Node and its contexts
212 CREATE UNIQUE INDEX ON public.nodes_contexts USING btree (node_id, context_id);
213 CREATE INDEX ON public.nodes_contexts USING btree (node_id, context_id, category);
216 ------------------------------------------------------------------------
217 CREATE UNIQUE INDEX ON public.context_node_ngrams USING btree (context_id, node_id, ngrams_id, ngrams_type);
218 CREATE INDEX ON public.context_node_ngrams USING btree (context_id, node_id);
219 CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_id, node_id);
220 CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_type);
222 CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id);
223 CREATE INDEX ON public.context_node_ngrams2 USING btree (nodengrams_id);
224 CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id, nodengrams_id);
226 DROP TABLE if EXISTS public.node_nodengrams_nodengrams;
228 DROP TRIGGER if EXISTS trigger_count_delete2 ON nodes_nodes;
229 DROP TRIGGER if EXISTS trigger_count_update_add ON nodes_nodes;
230 DROP TRIGGER if EXISTS trigger_delete_count ON nodes_nodes;
231 DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
234 -- Indexes needed to speed up the deletes
235 -- Trigger for constraint node_ngrams_node_id_fkey
236 CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx ON public.node_ngrams USING btree (node_id);
238 -- Trigger for constraint node_node_ngrams2_node_id_fkey
239 CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
241 -- Trigger for constraint node_node_ngrams_node1_id_fkey
242 CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx ON public.node_node_ngrams USING btree (node1_id);
244 -- Trigger for constraint node_node_ngrams_node2_id_fkey
245 CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx ON public.node_node_ngrams USING btree (node2_id);
247 -- Trigger for constraint nodes_nodes_node1_id_fkey
248 CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
249 -- Trigger for constraint nodes_nodes_node2_id_fkey
250 CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
252 -- Trigger for constraint nodes_parent_id_fkey
253 CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
255 -- Trigger for constraint rights_node_id_fkey
256 CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
258 -- Trigger for constraint nodes_contexts_node_id_fkey
259 CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
261 -- Trigger for constraint context_node_ngrams_node_id_fkey
262 CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);