]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/NodeNgramsNgrams.hs
Merge remote-tracking branch 'origin/dev-ngrams-table' into dev
[gargantext.git] / src / Gargantext / Database / Schema / NodeNgramsNgrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.NodeNgramsNgrams
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 NodeNgramsNgrams table is used to group Ngrams
11 - NodeId :: List Id
12 - NgramId_1, NgramId_2 where all NgramId_2 will be added to NgramId_1
13 - weight: probability of the relation (TODO, fixed to 1 for simple stemming)
14
15 Next Step benchmark:
16 - recursive queries of postgres
17 - group with: https://en.wikipedia.org/wiki/Nested_set_model
18
19 -}
20
21 {-# LANGUAGE Arrows #-}
22 {-# LANGUAGE FlexibleInstances #-}
23 {-# LANGUAGE FunctionalDependencies #-}
24 {-# LANGUAGE MultiParamTypeClasses #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE OverloadedStrings #-}
27 {-# LANGUAGE QuasiQuotes #-}
28 {-# LANGUAGE RankNTypes #-}
29 {-# LANGUAGE TemplateHaskell #-}
30 {-# OPTIONS_GHC -fno-warn-orphans #-}
31
32 module Gargantext.Database.Schema.NodeNgramsNgrams
33 where
34
35 import Control.Lens (view)
36 import Control.Lens.TH (makeLensesWith, abbreviatedFields)
37 import Control.Monad.IO.Class (liftIO)
38 import Data.Text (Text)
39 import Data.Maybe (Maybe)
40 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
41 import Database.PostgreSQL.Simple.SqlQQ (sql)
42 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
43 import Gargantext.Database.Utils (Cmd, runOpaQuery, runPGSQuery, connection)
44 import Gargantext.Core.Types.Main (ListId)
45 import Gargantext.Prelude
46 import Opaleye
47 import qualified Database.PostgreSQL.Simple as PGS
48
49 data NodeNgramsNgramsPoly node_id ngram1_id ngram2_id weight =
50 NodeNgramsNgrams { _nng_NodeId :: node_id
51 , _nng_Ngram1Id :: ngram1_id
52 , _nng_Ngram2Id :: ngram2_id
53 , _nng_Weight :: weight
54 } deriving (Show)
55
56 type NodeNgramsNgramsWrite =
57 NodeNgramsNgramsPoly (Column PGInt4 )
58 (Column PGInt4 )
59 (Column PGInt4 )
60 (Maybe (Column PGFloat8))
61
62 type NodeNgramsNgramsRead =
63 NodeNgramsNgramsPoly (Column PGInt4 )
64 (Column PGInt4 )
65 (Column PGInt4 )
66 (Column PGFloat8)
67
68 type NodeNgramsNgrams =
69 NodeNgramsNgramsPoly Int
70 Int
71 Int
72 (Maybe Double)
73
74 $(makeAdaptorAndInstance "pNodeNgramsNgrams"
75 ''NodeNgramsNgramsPoly)
76 $(makeLensesWith abbreviatedFields
77 ''NodeNgramsNgramsPoly)
78
79
80 nodeNgramsNgramsTable :: Table NodeNgramsNgramsWrite NodeNgramsNgramsRead
81 nodeNgramsNgramsTable =
82 Table "nodes_ngrams_ngrams"
83 ( pNodeNgramsNgrams NodeNgramsNgrams
84 { _nng_NodeId = required "node_id"
85 , _nng_Ngram1Id = required "ngram1_id"
86 , _nng_Ngram2Id = required "ngram2_id"
87 , _nng_Weight = optional "weight"
88 }
89 )
90
91 queryNodeNgramsNgramsTable :: Query NodeNgramsNgramsRead
92 queryNodeNgramsNgramsTable = queryTable nodeNgramsNgramsTable
93
94 -- | Select NodeNgramsNgrams
95 -- TODO not optimized (get all ngrams without filters)
96 nodeNgramsNgrams :: Cmd err [NodeNgramsNgrams]
97 nodeNgramsNgrams = runOpaQuery queryNodeNgramsNgramsTable
98
99 instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
100 queryRunnerColumnDefault = fieldQueryRunnerColumn
101
102 instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
103 queryRunnerColumnDefault = fieldQueryRunnerColumn
104
105
106 -- TODO: Add option on conflict
107 insertNodeNgramsNgramsNew :: [NodeNgramsNgrams] -> Cmd err Int
108 insertNodeNgramsNgramsNew = insertNodeNgramsNgramsW
109 . map (\(NodeNgramsNgrams n ng1 ng2 maybeWeight) ->
110 NodeNgramsNgrams (pgInt4 n )
111 (pgInt4 ng1)
112 (pgInt4 ng2)
113 (pgDouble <$> maybeWeight)
114 )
115
116 insertNodeNgramsNgramsW :: [NodeNgramsNgramsWrite] -> Cmd err Int
117 insertNodeNgramsNgramsW ns = do
118 c <- view connection
119 liftIO $ fromIntegral <$> runInsertMany c nodeNgramsNgramsTable ns
120
121 ------------------------------------------------------------------------
122 data Action = Del | Add
123
124 type NgramsParent = Text
125 type NgramsChild = Text
126
127
128 ngramsGroup :: Action -> [(ListId, NgramsParent, NgramsChild, Maybe Double)]
129 -> Cmd err [Int]
130 ngramsGroup _ [] = pure []
131 ngramsGroup action ngs = runNodeNgramsNgrams q ngs
132 where
133 q = case action of
134 Del -> queryDelNodeNgramsNgrams
135 Add -> queryInsertNodeNgramsNgrams
136
137
138 runNodeNgramsNgrams :: PGS.Query -> [(ListId, NgramsParent, NgramsChild{-, Maybe Double-})] -> Cmd err [Int]
139 runNodeNgramsNgrams q ngs = map (\(PGS.Only a) -> a) <$> runPGSQuery q (PGS.Only $ Values fields ngs' )
140 where
141 ngs' = map (\(n,ng1,ng2{-,w-}) -> (n,ng1,ng2{-,maybe 0 identity w-})) ngs
142 fields = map (\t -> QualifiedIdentifier Nothing t)
143 ["int4","text","text"{-,"double precision"-}]
144
145 --------------------------------------------------------------------
146 -- TODO: on conflict update weight
147 queryInsertNodeNgramsNgrams :: PGS.Query
148 queryInsertNodeNgramsNgrams = [sql|
149 WITH input_rows(nId,ng1,ng2
150 -- ,w
151 ) AS (?)
152 INSERT INTO nodes_ngrams_ngrams (node_id,ngram1_id,ngram2_id,weight)
153 SELECT nId,ngrams1.id,ngrams2.id
154 -- ,w
155 FROM input_rows
156 JOIN ngrams ngrams1 ON ngrams1.terms = ng1
157 JOIN ngrams ngrams2 ON ngrams2.terms = ng2
158 ON CONFLICT (node_id,ngram1_id,ngram2_id) DO NOTHING -- unique index created here
159 |]
160
161 queryDelNodeNgramsNgrams :: PGS.Query
162 queryDelNodeNgramsNgrams = [sql|
163 WITH input(nId,ng1,ng2,w) AS (?)
164 DELETE FROM nodes_ngrams_ngrams nnn
165 JOIN ngrams ngrams1 ON ngrams.terms = ng1
166 JOIN ngrams ngrams2 ON ngrams.terms = ng2
167 WHERE nnn.node_id = input.nId
168 AND nnn.ngram1_id = ngrams1.id
169 AND nnn.ngram2_id = ngrams2.id
170 ;)
171 |]
172