]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/NodeNgramsNgrams.hs
[FLOW][Database] ngrams extraction and insertion.
[gargantext.git] / src / Gargantext / Database / Schema / NodeNgramsNgrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.NodeNgramsNgrams
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 NodeNgramsNgrams table is used to group Ngrams
11 - NodeId :: List Id
12 - NgramId_1, NgramId_2 where all NgramId_2 will be added to NgramId_1
13 - weight: probability of the relation (TODO, fixed to 1 for simple stemming)
14
15 Next Step benchmark:
16 - recursive queries of postgres
17 - group with: https://en.wikipedia.org/wiki/Nested_set_model
18
19 -}
20
21 {-# LANGUAGE Arrows #-}
22 {-# LANGUAGE FlexibleInstances #-}
23 {-# LANGUAGE FunctionalDependencies #-}
24 {-# LANGUAGE MultiParamTypeClasses #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE OverloadedStrings #-}
27 {-# LANGUAGE QuasiQuotes #-}
28 {-# LANGUAGE TemplateHaskell #-}
29 {-# OPTIONS_GHC -fno-warn-orphans #-}
30
31 module Gargantext.Database.Schema.NodeNgramsNgrams
32 where
33
34 import Control.Lens.TH (makeLensesWith, abbreviatedFields)
35 import Data.Text (Text)
36 import Data.Maybe (Maybe)
37 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
38 import Database.PostgreSQL.Simple.SqlQQ (sql)
39 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
40 import Gargantext.Database.Utils (mkCmd, Cmd(..))
41 import Gargantext.Prelude
42 import Opaleye
43 import qualified Database.PostgreSQL.Simple as DPS
44
45 data NodeNgramsNgramsPoly node_id ngram1_id ngram2_id weight =
46 NodeNgramsNgrams { _nng_NodeId :: node_id
47 , _nng_Ngram1Id :: ngram1_id
48 , _nng_Ngram2Id :: ngram2_id
49 , _nng_Weight :: weight
50 } deriving (Show)
51
52
53 type NodeNgramsNgramsWrite =
54 NodeNgramsNgramsPoly (Column PGInt4 )
55 (Column PGInt4 )
56 (Column PGInt4 )
57 (Maybe (Column PGFloat8))
58
59 type NodeNgramsNgramsRead =
60 NodeNgramsNgramsPoly (Column PGInt4 )
61 (Column PGInt4 )
62 (Column PGInt4 )
63 (Column PGFloat8)
64
65 type NodeNgramsNgrams =
66 NodeNgramsNgramsPoly Int
67 Int
68 Int
69 (Maybe Double)
70
71 $(makeAdaptorAndInstance "pNodeNgramsNgrams"
72 ''NodeNgramsNgramsPoly)
73 $(makeLensesWith abbreviatedFields
74 ''NodeNgramsNgramsPoly)
75
76
77 nodeNgramsNgramsTable :: Table NodeNgramsNgramsWrite NodeNgramsNgramsRead
78 nodeNgramsNgramsTable =
79 Table "nodes_ngrams_ngrams"
80 ( pNodeNgramsNgrams NodeNgramsNgrams
81 { _nng_NodeId = required "node_id"
82 , _nng_Ngram1Id = required "ngram1_id"
83 , _nng_Ngram2Id = required "ngram2_id"
84 , _nng_Weight = optional "weight"
85 }
86 )
87
88 queryNodeNgramsNgramsTable :: Query NodeNgramsNgramsRead
89 queryNodeNgramsNgramsTable = queryTable nodeNgramsNgramsTable
90
91 -- | Select NodeNgramsNgrams
92 -- TODO not optimized (get all ngrams without filters)
93 nodeNgramsNgrams :: DPS.Connection -> IO [NodeNgramsNgrams]
94 nodeNgramsNgrams conn = runQuery conn queryNodeNgramsNgramsTable
95
96 instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
97 queryRunnerColumnDefault = fieldQueryRunnerColumn
98
99 instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
100 queryRunnerColumnDefault = fieldQueryRunnerColumn
101
102
103 -- TODO: Add option on conflict
104 insertNodeNgramsNgramsNew :: [NodeNgramsNgrams] -> Cmd Int
105 insertNodeNgramsNgramsNew = insertNodeNgramsNgramsW
106 . map (\(NodeNgramsNgrams n ng1 ng2 maybeWeight) ->
107 NodeNgramsNgrams (pgInt4 n )
108 (pgInt4 ng1)
109 (pgInt4 ng2)
110 (pgDouble <$> maybeWeight)
111 )
112
113 insertNodeNgramsNgramsW :: [NodeNgramsNgramsWrite] -> Cmd Int
114 insertNodeNgramsNgramsW ns =
115 mkCmd $ \c -> fromIntegral
116 <$> runInsertMany c nodeNgramsNgramsTable ns
117
118 ------------------------------------------------------------------------
119 data Action = Del | Add
120
121 type NgramsParent = Text
122 type NgramsChild = Text
123
124 ngramsGroup :: Action -> [(Int, NgramsParent, NgramsChild, Maybe Double)] -> Cmd [Int]
125 ngramsGroup a ngs = mkCmd $ \c -> ngramsGroup' c a ngs
126
127 -- TODO: remove this function (use Reader Monad only)
128 ngramsGroup' :: DPS.Connection -> Action -> [(Int, NgramsParent, NgramsChild, Maybe Double)] -> IO [Int]
129 ngramsGroup' c action ngs = runNodeNgramsNgrams c q ngs
130 where
131 q = case action of
132 Del -> queryDelNodeNgramsNgrams
133 Add -> queryInsertNodeNgramsNgrams
134
135
136 runNodeNgramsNgrams :: DPS.Connection -> DPS.Query -> [(Int, NgramsParent, NgramsChild, Maybe Double)] -> IO [Int]
137 runNodeNgramsNgrams c q ngs = map (\(DPS.Only a) -> a) <$> DPS.query c q (DPS.Only $ Values fields ngs' )
138 where
139 ngs' = map (\(n,ng1,ng2,w) -> (n,ng1,ng2,maybe 0 identity w)) ngs
140 fields = map (\t -> QualifiedIdentifier Nothing t)
141 ["int4","text","text","double"]
142
143 --------------------------------------------------------------------
144 -- TODO: on conflict update weight
145 queryInsertNodeNgramsNgrams :: DPS.Query
146 queryInsertNodeNgramsNgrams = [sql|
147 WITH input_rows(nId,ng1,ng2,w) AS (?)
148 INSERT INTO nodes_ngrams_ngrams (node_id,ngram1_id,ngram2_id,weight)
149 SELECT nId,ngrams1.id,ngrams2.id,w FROM input_rows
150 JOIN ngrams ngrams1 ON ngrams1.terms = ng1
151 JOIN ngrams ngrams2 ON ngrams2.terms = ng2
152 ON CONFLICT (node_id,ngram1_id,ngram2_id) DO NOTHING -- unique index created here
153 |]
154
155 queryDelNodeNgramsNgrams :: DPS.Query
156 queryDelNodeNgramsNgrams = [sql|
157 WITH input(nId,ng1,ng2,w) AS (?)
158 DELETE FROM nodes_ngrams_ngrams nnn
159 JOIN ngrams ngrams1 ON ngrams.terms = ng1
160 JOIN ngrams ngrams2 ON ngrams.terms = ng2
161 WHERE nnn.node_id = input.nId
162 AND nnn.ngram1_id = ngrams1.id
163 AND nnn.ngram2_id = ngrams2.id
164 ;)
165 |]
166