]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/NodeNgramsNgrams.hs
Merge remote-tracking branch 'origin/dev-ngrams-table' into dev
[gargantext.git] / src / Gargantext / Database / Schema / NodeNgramsNgrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.NodeNgramsNgrams
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 NodeNgramsNgrams table is used to group Ngrams
11 - NodeId :: List Id
12 - NgramId_1, NgramId_2 where all NgramId_2 will be added to NgramId_1
13 - weight: probability of the relation (TODO, fixed to 1 for simple stemming)
14
15 Next Step benchmark:
16 - recursive queries of postgres
17 - group with: https://en.wikipedia.org/wiki/Nested_set_model
18
19 -}
20
21 {-# LANGUAGE Arrows #-}
22 {-# LANGUAGE FlexibleInstances #-}
23 {-# LANGUAGE FunctionalDependencies #-}
24 {-# LANGUAGE MultiParamTypeClasses #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE OverloadedStrings #-}
27 {-# LANGUAGE QuasiQuotes #-}
28 {-# LANGUAGE RankNTypes #-}
29 {-# LANGUAGE TemplateHaskell #-}
30 {-# OPTIONS_GHC -fno-warn-orphans #-}
31
32 module Gargantext.Database.Schema.NodeNgramsNgrams
33 where
34
35 import Control.Lens (view)
36 import Control.Lens.TH (makeLensesWith, abbreviatedFields)
37 import Control.Monad (void)
38 import Control.Monad.IO.Class (liftIO)
39 import Data.Text (Text)
40 import Data.Maybe (Maybe)
41 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
42 import Database.PostgreSQL.Simple.SqlQQ (sql)
43 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
44 import Gargantext.Database.Utils (Cmd, runOpaQuery, execPGSQuery, connection)
45 import Gargantext.Core.Types.Main (ListId)
46 import Gargantext.Prelude
47 import Opaleye
48 import qualified Database.PostgreSQL.Simple as PGS
49
50 data NodeNgramsNgramsPoly node_id ngram1_id ngram2_id weight =
51 NodeNgramsNgrams { _nng_NodeId :: node_id
52 , _nng_Ngram1Id :: ngram1_id
53 , _nng_Ngram2Id :: ngram2_id
54 , _nng_Weight :: weight
55 } deriving (Show)
56
57 type NodeNgramsNgramsWrite =
58 NodeNgramsNgramsPoly (Column PGInt4 )
59 (Column PGInt4 )
60 (Column PGInt4 )
61 (Maybe (Column PGFloat8))
62
63 type NodeNgramsNgramsRead =
64 NodeNgramsNgramsPoly (Column PGInt4 )
65 (Column PGInt4 )
66 (Column PGInt4 )
67 (Column PGFloat8)
68
69 type NodeNgramsNgrams =
70 NodeNgramsNgramsPoly Int
71 Int
72 Int
73 (Maybe Double)
74
75 $(makeAdaptorAndInstance "pNodeNgramsNgrams"
76 ''NodeNgramsNgramsPoly)
77 $(makeLensesWith abbreviatedFields
78 ''NodeNgramsNgramsPoly)
79
80
81 nodeNgramsNgramsTable :: Table NodeNgramsNgramsWrite NodeNgramsNgramsRead
82 nodeNgramsNgramsTable =
83 Table "nodes_ngrams_ngrams"
84 ( pNodeNgramsNgrams NodeNgramsNgrams
85 { _nng_NodeId = required "node_id"
86 , _nng_Ngram1Id = required "ngram1_id"
87 , _nng_Ngram2Id = required "ngram2_id"
88 , _nng_Weight = optional "weight"
89 }
90 )
91
92 queryNodeNgramsNgramsTable :: Query NodeNgramsNgramsRead
93 queryNodeNgramsNgramsTable = queryTable nodeNgramsNgramsTable
94
95 -- | Select NodeNgramsNgrams
96 -- TODO not optimized (get all ngrams without filters)
97 nodeNgramsNgrams :: Cmd err [NodeNgramsNgrams]
98 nodeNgramsNgrams = runOpaQuery queryNodeNgramsNgramsTable
99
100 instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
101 queryRunnerColumnDefault = fieldQueryRunnerColumn
102
103 instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
104 queryRunnerColumnDefault = fieldQueryRunnerColumn
105
106
107 -- TODO: Add option on conflict
108 insertNodeNgramsNgramsNew :: [NodeNgramsNgrams] -> Cmd err Int
109 insertNodeNgramsNgramsNew = insertNodeNgramsNgramsW
110 . map (\(NodeNgramsNgrams n ng1 ng2 maybeWeight) ->
111 NodeNgramsNgrams (pgInt4 n )
112 (pgInt4 ng1)
113 (pgInt4 ng2)
114 (pgDouble <$> maybeWeight)
115 )
116
117 insertNodeNgramsNgramsW :: [NodeNgramsNgramsWrite] -> Cmd err Int
118 insertNodeNgramsNgramsW ns = do
119 c <- view connection
120 liftIO $ fromIntegral <$> runInsertMany c nodeNgramsNgramsTable ns
121
122 ------------------------------------------------------------------------
123 data Action = Del | Add
124
125 type NgramsParent = Text
126 type NgramsChild = Text
127
128
129 ngramsGroup :: Action -> [(ListId, NgramsParent, NgramsChild, Maybe Double)]
130 -> Cmd err ()
131 ngramsGroup _ [] = pure ()
132 ngramsGroup action ngs = runNodeNgramsNgrams q ngs
133 where
134 q = case action of
135 Del -> queryDelNodeNgramsNgrams
136 Add -> queryInsertNodeNgramsNgrams
137
138
139 runNodeNgramsNgrams :: PGS.Query -> [(ListId, NgramsParent, NgramsChild, Maybe Double)] -> Cmd err ()
140 runNodeNgramsNgrams q ngs = void $ execPGSQuery q (PGS.Only $ Values fields ngs')
141 where
142 ngs' = map (\(n,ng1,ng2,w) -> (n,ng1,ng2,maybe 0 identity w)) ngs
143 fields = map (\t -> QualifiedIdentifier Nothing t)
144 ["int4","text","text","float8"]
145
146 --------------------------------------------------------------------
147 -- TODO: on conflict update weight
148 queryInsertNodeNgramsNgrams :: PGS.Query
149 queryInsertNodeNgramsNgrams = [sql|
150 WITH input_rows(nId,ng1,ng2,w) AS (?)
151 INSERT INTO nodes_ngrams_ngrams (node_id,ngram1_id,ngram2_id,weight)
152 SELECT nId,ngrams1.id,ngrams2.id,w FROM input_rows
153 JOIN ngrams ngrams1 ON ngrams1.terms = ng1
154 JOIN ngrams ngrams2 ON ngrams2.terms = ng2
155 ON CONFLICT (node_id,ngram1_id,ngram2_id) DO NOTHING -- unique index created here
156 |]
157
158 queryDelNodeNgramsNgrams :: PGS.Query
159 queryDelNodeNgramsNgrams = [sql|
160 WITH input(nId,ng1,ng2,w) AS (?)
161 DELETE FROM nodes_ngrams_ngrams nnn
162 JOIN ngrams ngrams1 ON ngrams.terms = ng1
163 JOIN ngrams ngrams2 ON ngrams.terms = ng2
164 WHERE nnn.node_id = input.nId
165 AND nnn.ngram1_id = ngrams1.id
166 AND nnn.ngram2_id = ngrams2.id
167 ;)
168 |]
169