3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Import a corpus binary.
14 {-# LANGUAGE Strict #-}
15 {-# LANGUAGE QuasiQuotes #-}
20 import Data.Either (Either(..))
21 import Database.PostgreSQL.Simple.SqlQQ (sql)
22 import GHC.IO.Exception (IOException)
23 import Gargantext.API.Admin.EnvTypes (DevEnv)
24 import Gargantext.API.Dev (withDevEnv, runCmdDev)
25 import Gargantext.API.Node () -- instances only
26 import Gargantext.API.Prelude (GargError)
27 import Gargantext.API.Ngrams.Tools (migrateFromDirToDb)
28 import Gargantext.Core (HasDBid(toDBid))
29 import Gargantext.Core.Types.Individu (User(..))
30 import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
31 import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
32 import Gargantext.Database.Admin.Trigger.Init
33 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
34 import Gargantext.Database.Admin.Types.Node (NodeType(NodeDocument, NodeContact))
35 import Gargantext.Database.Prelude (Cmd'', Cmd, execPGSQuery)
36 import Gargantext.Prelude
37 import Gargantext.Prelude.Config (GargConfig(..), readConfig)
38 import Prelude (getLine)
39 import System.Environment (getArgs)
40 import qualified Data.List as List (cycle, concat, take, unlines)
51 putStrLn "GarganText upgrade to version 0.0.6"
54 params@[iniPath] <- getArgs
55 _ <- if length params /= 1
56 then panic "Usage: ./gargantext-upgrade gargantext.ini"
59 putStrLn $ List.unlines
60 [ "Your Database defined in gargantext.ini will be upgraded."
61 , "We stronlgy recommend you to make a backup using pg_dump."
63 , "Press ENTER if you want to continue, CTRL+C if you want to stop."
68 cfg <- readConfig iniPath
69 let _secret = _gc_secretkey cfg
71 withDevEnv iniPath $ \env -> do
72 -- First upgrade the Database Schema
73 _ <- runCmdDev env (migrateFromDirToDb :: Cmd GargError ())
76 putStrLn "Uprade done with success !"
82 sqlUpdateTriggerHash :: Cmd'' DevEnv IOException Int64
83 sqlUpdateTriggerHash = do
87 UPDATE nodes SET typename = typename;
88 UPDATE contexts SET typename = typename;
92 sqlNodes2Contexts :: Cmd'' DevEnv IOException Int64
93 sqlNodes2Contexts = do
94 execPGSQuery query (toDBid NodeDocument,toDBid NodeContact)
97 -- WITH docs (id,hash_id,typename,user_id,parent_id,name,date,hyperdata, search)
98 WITH docs AS (SELECT * from nodes WHERE nodes.typename IN (?,?)),
100 inserted (id, hash_id) AS (
101 INSERT INTO contexts (hash_id,typename,user_id,parent_id,name,date,hyperdata, search)
102 SELECT d.hash_id,d.typename,d.user_id,NULL,d.name,d.date,d.hyperdata,search FROM docs AS d
103 RETURNING contexts.id, contexts.hash_id
106 indexed (node_id, context_id) AS (
107 SELECT docs.id, inserted.id from inserted
108 JOIN docs on docs.hash_id = inserted.hash_id
111 -- nodes_nodes -> nodes_contexts
112 nodes_contexts_query AS (
113 INSERT INTO nodes_contexts (node_id, context_id,score, category)
114 SELECT nn.node1_id,i.context_id,nn.score,nn.category FROM nodes_nodes nn
115 JOIN indexed i ON i.node_id = nn.node2_id
118 -- nodes_nodes_ngrams -> contexts_nodes_ngrams
119 contexts_nodes_ngrams_query AS (
120 INSERT INTO context_node_ngrams
121 SELECT i.context_id, nnn.node1_id, nnn.ngrams_id, nnn.ngrams_type, nnn.weight FROM node_node_ngrams nnn
122 JOIN indexed i ON i.node_id = nnn.node2_id
125 ---- nodes_nodes_ngrams2 -> contexts_nodes_ngrams2
126 context_node_ngrams2_query AS (
127 INSERT INTO context_node_ngrams2
128 SELECT i.context_id, nnn2.nodengrams_id, nnn2.weight FROM node_node_ngrams2 nnn2
129 JOIN indexed i ON i.node_id = nnn2.node_id
132 -- WITH CASCADE it should update others tables
134 USING indexed i WHERE i.node_id = n.id
137 UPDATE contexts SET parent_id = id;
146 sqlSchema :: Cmd'' DevEnv IOException Int64
148 execPGSQuery query ()
151 -- TODO typename -> type_id
152 CREATE TABLE public.contexts (
154 hash_id CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
155 typename INTEGER NOT NULL,
156 user_id INTEGER NOT NULL,
157 parent_id INTEGER REFERENCES public.contexts(id) ON DELETE CASCADE ,
158 name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
159 date TIMESTAMP with time zone DEFAULT now() NOT NULL,
160 hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
163 FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE
165 ALTER TABLE public.contexts OWNER TO gargantua;
169 -- To attach contexts to a Corpus
170 CREATE TABLE public.nodes_contexts (
171 node_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
172 context_id INTEGER NOT NULL REFERENCES public.contexts(id) ON DELETE CASCADE,
175 PRIMARY KEY (node_id, context_id)
177 ALTER TABLE public.nodes_contexts OWNER TO gargantua;
179 ---------------------------------------------------------------
180 CREATE TABLE public.context_node_ngrams (
181 context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
182 node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
183 ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
184 ngrams_type INTEGER ,
185 weight double precision,
186 PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type)
190 ALTER TABLE public.context_node_ngrams OWNER TO gargantua;
192 CREATE TABLE public.context_node_ngrams2 (
193 context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
194 nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
195 weight double precision,
196 PRIMARY KEY (context_id, nodengrams_id)
198 ALTER TABLE public.context_node_ngrams2 OWNER TO gargantua;
202 CREATE INDEX ON public.contexts USING gin (hyperdata);
203 CREATE INDEX ON public.contexts USING btree (user_id, typename, parent_id);
204 CREATE INDEX ON public.contexts USING btree (id, typename, date ASC);
205 CREATE INDEX ON public.contexts USING btree (id, typename, date DESC);
206 CREATE INDEX ON public.contexts USING btree (typename, id);
207 CREATE UNIQUE INDEX ON public.contexts USING btree (hash_id);
210 -- To make the links between Corpus Node and its contexts
211 CREATE UNIQUE INDEX ON public.nodes_contexts USING btree (node_id, context_id);
212 CREATE INDEX ON public.nodes_contexts USING btree (node_id, context_id, category);
215 ------------------------------------------------------------------------
216 CREATE UNIQUE INDEX ON public.context_node_ngrams USING btree (context_id, node_id, ngrams_id, ngrams_type);
217 CREATE INDEX ON public.context_node_ngrams USING btree (context_id, node_id);
218 CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_id, node_id);
219 CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_type);
221 CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id);
222 CREATE INDEX ON public.context_node_ngrams2 USING btree (nodengrams_id);
223 CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id, nodengrams_id);
225 DROP TABLE if EXISTS public.node_nodengrams_nodengrams;
227 DROP TRIGGER if EXISTS trigger_count_delete2 ON nodes_nodes;
228 DROP TRIGGER if EXISTS trigger_count_update_add ON nodes_nodes;
229 DROP TRIGGER if EXISTS trigger_delete_count ON nodes_nodes;
230 DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
233 -- Indexes needed to speed up the deletes
234 -- Trigger for constraint node_ngrams_node_id_fkey
235 CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx ON public.node_ngrams USING btree (node_id);
237 -- Trigger for constraint node_node_ngrams2_node_id_fkey
238 CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
240 -- Trigger for constraint node_node_ngrams_node1_id_fkey
241 CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx ON public.node_node_ngrams USING btree (node1_id);
243 -- Trigger for constraint node_node_ngrams_node2_id_fkey
244 CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx ON public.node_node_ngrams USING btree (node2_id);
246 -- Trigger for constraint nodes_nodes_node1_id_fkey
247 CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
248 -- Trigger for constraint nodes_nodes_node2_id_fkey
249 CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
251 -- Trigger for constraint nodes_parent_id_fkey
252 CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
254 -- Trigger for constraint rights_node_id_fkey
255 CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
257 -- Trigger for constraint nodes_contexts_node_id_fkey
258 CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
260 -- Trigger for constraint context_node_ngrams_node_id_fkey
261 CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);