]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Cooc.hs
[TREE] SQL query.
[gargantext.git] / src / Gargantext / Database / Cooc.hs
1 {-|
2 Module : Gargantext.Database.Cooc
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE NoImplicitPrelude #-}
13 --{-# LANGUAGE OverloadedStrings #-}
14
15 {-# LANGUAGE QuasiQuotes #-}
16
17 module Gargantext.Database.Cooc where
18
19 import Control.Monad ((>>=))
20 import Database.PostgreSQL.Simple
21 import Database.PostgreSQL.Simple.SqlQQ
22
23 import Gargantext.Prelude
24 import Gargantext (connectGargandb)
25
26 type CorpusId = Int
27 type MainListId = Int
28 type GroupListId = Int
29
30 coocTest :: IO [(Int, Int, Int)]
31 coocTest = connectGargandb "gargantext.ini"
32 >>= \conn -> dBcooc conn 421968 446602 446599
33
34 dBcooc :: Connection -> CorpusId -> MainListId -> GroupListId -> IO [(Int, Int, Int)]
35 dBcooc conn corpus mainList groupList = query conn [sql|
36 set work_mem='1GB';
37
38 --EXPLAIN ANALYZE
39 WITH COOC as (
40 SELECT
41 COALESCE(grA.ngram1_id, wlA.ngram_id) as ngA,
42 COALESCE(grB.ngram1_id, wlB.ngram_id) as ngB,
43 COUNT(*) AS score
44 FROM
45 nodes AS n
46 -- / -- X Y
47 -- SQL graph for getting the cooccurrences
48
49 -- STEP 1: X axis of the matrix
50 INNER JOIN nodes_ngrams
51 AS ngA ON ngA.node_id = n.id
52 -- \--> get the occurrences node/ngram of the corpus
53
54 INNER JOIN nodes_ngrams
55 AS wlA ON ngA.ngram_id = wlA.ngram_id
56 AND wlA.node_id = ?
57 -- \--> filter with white/main list (typename 7)
58
59 LEFT JOIN nodes_ngrams_ngrams
60 AS grA ON wlA.ngram_id = grA.ngram1_id
61 AND grA.node_id = ?
62 -- \--> adding (joining) ngrams that are grouped (typename 6)
63 LEFT JOIN nodes_ngrams
64 AS wlAA ON grA.ngram2_id = wlAA.ngram_id
65 AND wlAA.node_id = wlA.node_id
66 -- \--> adding (joining) ngrams that are not grouped
67 --LEFT JOIN ngrams AS wlAA ON grA.ngram2_id = wlAA.id
68 -- \--> for joining all synonyms even if they are not in the main list (white list)
69
70
71 -- STEP 2: Y axi of the matrix
72 INNER JOIN nodes_ngrams
73 AS ngB ON ngB.node_id = n.id
74 -- \--> get the occurrences node/ngram of the corpus
75
76 INNER JOIN nodes_ngrams
77 AS wlB ON ngB.ngram_id = wlB.ngram_id
78 AND wlB.node_id = ?
79 -- \--> filter with white/main list
80
81 LEFT JOIN nodes_ngrams_ngrams
82 AS grB ON wlB.ngram_id = grB.ngram1_id
83 AND grB.node_id = ?
84 -- \--> adding (joining) ngrams that are grouped
85 LEFT JOIN nodes_ngrams
86 AS wlBB ON grB.ngram2_id = wlBB.ngram_id
87 AND wlBB.node_id = wlB.node_id
88 -- \--> adding (joining) ngrams that are not grouped
89
90 -- LEFT JOIN ngrams AS wlBB ON grB.ngram2_id = wlBB.id
91 -- \--> for joining all synonyms even if they are not in the main list (white list)
92
93 WHERE
94 n.typename = 4
95 AND n.parent_id = ?
96 GROUP BY 1,2
97 -- ==
98 -- GROUP BY ngA, ngB
99 )
100
101 SELECT ngA, ngB, score
102 FROM COOC --> from the query above
103
104 WHERE score >= 3
105 AND
106 ngA <= ngB
107 |] (mainList, groupList, mainList, groupList, corpus)
108
109