]> Git — Sourcephile - gargantext.git/blob - bin/gargantext-upgrade/Main.hs
[ngrams] first version of ngrams counting in docs
[gargantext.git] / bin / gargantext-upgrade / Main.hs
1 {-|
2 Module : Main.hs
3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Import a corpus binary.
11
12 -}
13
14 {-# LANGUAGE Strict #-}
15 {-# LANGUAGE QuasiQuotes #-}
16
17 module Main where
18
19
20 import Data.Either (Either(..))
21 import Database.PostgreSQL.Simple.SqlQQ (sql)
22 import GHC.IO.Exception (IOException)
23 import Gargantext.API.Admin.EnvTypes (DevEnv)
24 import Gargantext.API.Dev (withDevEnv, runCmdDev)
25 import Gargantext.API.Node () -- instances only
26 import Gargantext.API.Prelude (GargError)
27 import Gargantext.API.Ngrams.Tools (migrateFromDirToDb)
28 import Gargantext.Core (HasDBid(toDBid))
29 import Gargantext.Core.Types.Individu (User(..))
30 import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
31 import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
32 import Gargantext.Database.Admin.Trigger.Init
33 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
34 import Gargantext.Database.Admin.Types.Node (NodeType(NodeDocument, NodeContact))
35 import Gargantext.Database.Prelude (Cmd'', Cmd, execPGSQuery)
36 import Gargantext.Prelude
37 import Gargantext.Prelude.Config (GargConfig(..), readConfig)
38 import Prelude (getLine)
39 import System.Environment (getArgs)
40 import qualified Data.List as List (cycle, concat, take, unlines)
41
42 main :: IO ()
43 main = do
44
45 let ___ = putStrLn
46 $ List.concat
47 $ List.take 72
48 $ List.cycle ["_"]
49
50 ___
51 putStrLn "GarganText upgrade to version 0.0.6"
52 ___
53
54 params@[iniPath] <- getArgs
55 _ <- if length params /= 1
56 then panic "Usage: ./gargantext-upgrade gargantext.ini"
57 else pure ()
58
59 putStrLn $ List.unlines
60 [ "Your Database defined in gargantext.ini will be upgraded."
61 , "We stronlgy recommend you to make a backup using pg_dump."
62 , ""
63 , "Press ENTER if you want to continue, CTRL+C if you want to stop."
64 ]
65
66 _ok <- getLine
67
68 cfg <- readConfig iniPath
69 let _secret = _gc_secretkey cfg
70
71 withDevEnv iniPath $ \env -> do
72 -- First upgrade the Database Schema
73 _ <- runCmdDev env (migrateFromDirToDb :: Cmd GargError ())
74
75 ___
76 putStrLn "Uprade done with success !"
77 ___
78 pure ()
79
80
81 {-
82 sqlUpdateTriggerHash :: Cmd'' DevEnv IOException Int64
83 sqlUpdateTriggerHash = do
84 execPGSQuery query ()
85 where
86 query = [sql|
87 UPDATE nodes SET typename = typename;
88 UPDATE contexts SET typename = typename;
89 |]
90
91
92 sqlNodes2Contexts :: Cmd'' DevEnv IOException Int64
93 sqlNodes2Contexts = do
94 execPGSQuery query (toDBid NodeDocument,toDBid NodeContact)
95 where
96 query = [sql|
97 -- WITH docs (id,hash_id,typename,user_id,parent_id,name,date,hyperdata, search)
98 WITH docs AS (SELECT * from nodes WHERE nodes.typename IN (?,?)),
99
100 inserted (id, hash_id) AS (
101 INSERT INTO contexts (hash_id,typename,user_id,parent_id,name,date,hyperdata, search)
102 SELECT d.hash_id,d.typename,d.user_id,NULL,d.name,d.date,d.hyperdata,search FROM docs AS d
103 RETURNING contexts.id, contexts.hash_id
104 ),
105
106 indexed (node_id, context_id) AS (
107 SELECT docs.id, inserted.id from inserted
108 JOIN docs on docs.hash_id = inserted.hash_id
109 ),
110
111 -- nodes_nodes -> nodes_contexts
112 nodes_contexts_query AS (
113 INSERT INTO nodes_contexts (node_id, context_id,score, category)
114 SELECT nn.node1_id,i.context_id,nn.score,nn.category FROM nodes_nodes nn
115 JOIN indexed i ON i.node_id = nn.node2_id
116 ),
117
118 -- nodes_nodes_ngrams -> contexts_nodes_ngrams
119 contexts_nodes_ngrams_query AS (
120 INSERT INTO context_node_ngrams
121 SELECT i.context_id, nnn.node1_id, nnn.ngrams_id, nnn.ngrams_type, nnn.weight FROM node_node_ngrams nnn
122 JOIN indexed i ON i.node_id = nnn.node2_id
123 ),
124
125 ---- nodes_nodes_ngrams2 -> contexts_nodes_ngrams2
126 context_node_ngrams2_query AS (
127 INSERT INTO context_node_ngrams2
128 SELECT i.context_id, nnn2.nodengrams_id, nnn2.weight FROM node_node_ngrams2 nnn2
129 JOIN indexed i ON i.node_id = nnn2.node_id
130 )
131
132 -- WITH CASCADE it should update others tables
133 DELETE FROM nodes n
134 USING indexed i WHERE i.node_id = n.id
135 ;
136
137 UPDATE contexts SET parent_id = id;
138
139
140 |]
141
142
143
144
145
146 sqlSchema :: Cmd'' DevEnv IOException Int64
147 sqlSchema = do
148 execPGSQuery query ()
149 where
150 query = [sql|
151 -- TODO typename -> type_id
152 CREATE TABLE public.contexts (
153 id SERIAL,
154 hash_id CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
155 typename INTEGER NOT NULL,
156 user_id INTEGER NOT NULL,
157 parent_id INTEGER REFERENCES public.contexts(id) ON DELETE CASCADE ,
158 name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
159 date TIMESTAMP with time zone DEFAULT now() NOT NULL,
160 hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
161 search tsvector,
162 PRIMARY KEY (id),
163 FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE
164 );
165 ALTER TABLE public.contexts OWNER TO gargantua;
166
167
168
169 -- To attach contexts to a Corpus
170 CREATE TABLE public.nodes_contexts (
171 node_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
172 context_id INTEGER NOT NULL REFERENCES public.contexts(id) ON DELETE CASCADE,
173 score REAL ,
174 category INTEGER ,
175 PRIMARY KEY (node_id, context_id)
176 );
177 ALTER TABLE public.nodes_contexts OWNER TO gargantua;
178
179 ---------------------------------------------------------------
180 CREATE TABLE public.context_node_ngrams (
181 context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
182 node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
183 ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
184 ngrams_type INTEGER ,
185 weight double precision,
186 PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type)
187 );
188
189
190 ALTER TABLE public.context_node_ngrams OWNER TO gargantua;
191
192 CREATE TABLE public.context_node_ngrams2 (
193 context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
194 nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
195 weight double precision,
196 PRIMARY KEY (context_id, nodengrams_id)
197 );
198 ALTER TABLE public.context_node_ngrams2 OWNER TO gargantua;
199
200
201
202 CREATE INDEX ON public.contexts USING gin (hyperdata);
203 CREATE INDEX ON public.contexts USING btree (user_id, typename, parent_id);
204 CREATE INDEX ON public.contexts USING btree (id, typename, date ASC);
205 CREATE INDEX ON public.contexts USING btree (id, typename, date DESC);
206 CREATE INDEX ON public.contexts USING btree (typename, id);
207 CREATE UNIQUE INDEX ON public.contexts USING btree (hash_id);
208
209
210 -- To make the links between Corpus Node and its contexts
211 CREATE UNIQUE INDEX ON public.nodes_contexts USING btree (node_id, context_id);
212 CREATE INDEX ON public.nodes_contexts USING btree (node_id, context_id, category);
213
214
215 ------------------------------------------------------------------------
216 CREATE UNIQUE INDEX ON public.context_node_ngrams USING btree (context_id, node_id, ngrams_id, ngrams_type);
217 CREATE INDEX ON public.context_node_ngrams USING btree (context_id, node_id);
218 CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_id, node_id);
219 CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_type);
220
221 CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id);
222 CREATE INDEX ON public.context_node_ngrams2 USING btree (nodengrams_id);
223 CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id, nodengrams_id);
224
225 DROP TABLE if EXISTS public.node_nodengrams_nodengrams;
226
227 DROP TRIGGER if EXISTS trigger_count_delete2 ON nodes_nodes;
228 DROP TRIGGER if EXISTS trigger_count_update_add ON nodes_nodes;
229 DROP TRIGGER if EXISTS trigger_delete_count ON nodes_nodes;
230 DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
231
232
233 -- Indexes needed to speed up the deletes
234 -- Trigger for constraint node_ngrams_node_id_fkey
235 CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx ON public.node_ngrams USING btree (node_id);
236
237 -- Trigger for constraint node_node_ngrams2_node_id_fkey
238 CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
239
240 -- Trigger for constraint node_node_ngrams_node1_id_fkey
241 CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx ON public.node_node_ngrams USING btree (node1_id);
242
243 -- Trigger for constraint node_node_ngrams_node2_id_fkey
244 CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx ON public.node_node_ngrams USING btree (node2_id);
245
246 -- Trigger for constraint nodes_nodes_node1_id_fkey
247 CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
248 -- Trigger for constraint nodes_nodes_node2_id_fkey
249 CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
250
251 -- Trigger for constraint nodes_parent_id_fkey
252 CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
253
254 -- Trigger for constraint rights_node_id_fkey
255 CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
256
257 -- Trigger for constraint nodes_contexts_node_id_fkey
258 CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
259
260 -- Trigger for constraint context_node_ngrams_node_id_fkey
261 CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);
262
263 |]
264
265 -}