]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Ngrams.hs
[FIX] group result of SQL result.
[gargantext.git] / src / Gargantext / Database / Ngrams.hs
1 {-|
2 Module : Gargantext.Database.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams connection to the Database.
11
12 -}
13
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE FunctionalDependencies #-}
18 {-# LANGUAGE MultiParamTypeClasses #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE QuasiQuotes #-}
22 {-# LANGUAGE TemplateHaskell #-}
23
24 module Gargantext.Database.Ngrams where
25
26 -- import Opaleye
27 import Control.Lens (makeLenses)
28 import Data.ByteString.Internal (ByteString)
29 import Data.Map (Map, fromList, lookup)
30 import Data.Text (Text, splitOn)
31 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
32 import Database.PostgreSQL.Simple.SqlQQ (sql)
33 import Database.PostgreSQL.Simple.ToField (toField)
34 import Database.PostgreSQL.Simple.ToRow (toRow)
35 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
36 import GHC.Generics (Generic)
37 import Gargantext.Database.Config (nodeTypeId)
38 import Gargantext.Database.Types.Node (NodeType)
39 import Gargantext.Database.Node (mkCmd, Cmd(..))
40 import Gargantext.Prelude
41 import qualified Database.PostgreSQL.Simple as DPS
42
43
44 --data NgramPoly id terms n = NgramDb { ngram_id :: id
45 -- , ngram_terms :: terms
46 -- , ngram_n :: n
47 -- } deriving (Show)
48 --
49 --type NgramWrite = NgramPoly (Maybe (Column PGInt4))
50 -- (Column PGText)
51 -- (Column PGInt4)
52 --
53 --type NgramRead = NgramPoly (Column PGInt4)
54 -- (Column PGText)
55 -- (Column PGInt4)
56 --
57 ----type Ngram = NgramPoly Int Text Int
58 --
59 -- $(makeAdaptorAndInstance "pNgram" ''NgramPoly)
60 -- $(makeLensesWith abbreviatedFields ''NgramPoly)
61 --
62 --ngramTable :: Table NgramWrite NgramRead
63 --ngramTable = Table "ngrams" (pNgram NgramDb { ngram_id = optional "id"
64 -- , ngram_terms = required "terms"
65 -- , ngram_n = required "n"
66 -- }
67 -- )
68 --
69 --queryNgramTable :: Query NgramRead
70 --queryNgramTable = queryTable ngramTable
71 --
72 --dbGetNgrams :: PGS.Connection -> IO [NgramDb]
73 --dbGetNgrams conn = runQuery conn queryNgramTable
74
75 -- | Main Ngrams Types
76 -- | Typed Ngrams
77 -- Typed Ngrams localize the context of the ngrams
78 -- ngrams in source field of document has Sources Type
79 -- ngrams in authors field of document has Authors Type
80 -- ngrams in text (title or abstract) of documents has Terms Type
81 data NgramsType = Authors | Institutes | Sources | Terms
82 deriving (Eq, Show, Ord)
83
84 ngramsTypeId :: NgramsType -> Int
85 ngramsTypeId Authors = 1
86 ngramsTypeId Institutes = 2
87 ngramsTypeId Sources = 3
88 ngramsTypeId Terms = 4
89
90 type NgramsTerms = Text
91 type NgramsId = Int
92 type Size = Int
93
94 ------------------------------------------------------------------------
95 -- | TODO put it in Gargantext.Text.Ngrams
96 data Ngrams = Ngrams { _ngramsTerms :: Text
97 , _ngramsSize :: Int
98 } deriving (Generic, Show, Eq, Ord)
99
100 makeLenses ''Ngrams
101 instance DPS.ToRow Ngrams where
102 toRow (Ngrams t s) = [toField t, toField s]
103
104 text2ngrams :: Text -> Ngrams
105 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
106
107 -------------------------------------------------------------------------
108 -- | TODO put it in Gargantext.Text.Ngrams
109 -- Named entity are typed ngrams of Terms Ngrams
110 data NgramsT a =
111 NgramsT { _ngramsType :: NgramsType
112 , _ngramsT :: a
113 } deriving (Generic, Show, Eq, Ord)
114
115 makeLenses ''NgramsT
116 -----------------------------------------------------------------------
117 data NgramsIndexed =
118 NgramsIndexed
119 { _ngrams :: Ngrams
120 , _ngramsId :: NgramsId
121 } deriving (Show, Generic, Eq, Ord)
122
123 makeLenses ''NgramsIndexed
124 ------------------------------------------------------------------------
125 data NgramIds =
126 NgramIds
127 { ngramId :: Int
128 , ngramTerms :: Text
129 } deriving (Show, Generic, Eq, Ord)
130
131 instance DPS.FromRow NgramIds where
132 fromRow = NgramIds <$> field <*> field
133
134 ----------------------
135 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
136 indexNgramsT m ngrId = indexNgramsTWith f ngrId
137 where
138 f n = maybe (panic "indexNgramsT: should not happen") identity (lookup n m)
139
140 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams-> NgramsT NgramsIndexed
141 indexNgramsTWith f (NgramsT t n) = NgramsT t (NgramsIndexed n ((f . _ngramsTerms) n))
142
143 insertNgrams :: [Ngrams] -> Cmd (Map NgramsTerms NgramsId)
144 insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
145
146 insertNgrams' :: [Ngrams] -> Cmd [NgramIds]
147 insertNgrams' ns = mkCmd $ \conn -> DPS.query conn queryInsertNgrams (DPS.Only $ Values fields ns)
148 where
149 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
150
151 insertNgrams_Debug :: [(NgramsTerms, Size)] -> Cmd ByteString
152 insertNgrams_Debug ns = mkCmd $ \conn -> DPS.formatQuery conn queryInsertNgrams (DPS.Only $ Values fields ns)
153 where
154 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
155
156 ----------------------
157 queryInsertNgrams :: DPS.Query
158 queryInsertNgrams = [sql|
159 WITH input_rows(terms,n) AS (?)
160 , ins AS (
161 INSERT INTO ngrams (terms,n)
162 SELECT * FROM input_rows
163 ON CONFLICT (terms) DO NOTHING -- unique index created here
164 RETURNING id,terms
165 )
166
167 SELECT id, terms
168 FROM ins
169 UNION ALL
170 SELECT c.id, terms
171 FROM input_rows
172 JOIN ngrams c USING (terms); -- columns of unique index
173 |]
174
175
176
177
178 -- | Ngrams Table
179
180 data NgramsTableParam =
181 NgramsTableParam { _nt_listId :: Int
182 , _nt_corpusId :: Int
183 }
184
185 type NgramsTableParamUser = NgramsTableParam
186 type NgramsTableParamMaster = NgramsTableParam
187
188 data NgramsTableData = NgramsTableData { _ntd_terms :: Text
189 , _ntd_n :: Int
190 , _ntd_ngramsType :: Int
191 , _ntd_weight :: Double
192 } deriving (Show)
193
194 getTableNgrams :: NodeType -> NgramsType -> NgramsTableParamUser -> NgramsTableParamMaster -> Cmd [(Text, Int, Int, Double)]
195 getTableNgrams nodeT ngrmT (NgramsTableParam ul uc) (NgramsTableParam ml mc) =
196 mkCmd $ \conn -> DPS.query conn querySelectTableNgrams (ul,uc,nodeTId,ngrmTId,ml,mc,nodeTId,ngrmTId)
197 where
198 nodeTId = nodeTypeId nodeT
199 ngrmTId = ngramsTypeId ngrmT
200
201
202
203 querySelectTableNgrams :: DPS.Query
204 querySelectTableNgrams = [sql|
205
206 WITH tableUser AS (select ngs.terms, ngs.n, nn1.ngrams_type,nn2.weight FROM ngrams ngs
207 JOIN nodes_ngrams nn1 ON nn1.ngram_id = ngs.id
208 JOIN nodes_ngrams nn2 ON nn2.ngram_id = ngs.id
209 JOIN nodes n ON n.id = nn2.node_id
210 WHERE nn1.node_id = ? -- User listId
211 AND n.parent_id = ? -- User CorpusId or AnnuaireId
212 AND n.typename = ? -- both type of childs (Documents or Contacts)
213 AND nn2.ngrams_type = ? -- both type of ngrams (Authors or Terms?)
214 ), tableMaster AS (select ngs.terms, ngs.n, nn1.ngrams_type,nn2.weight FROM ngrams ngs
215 JOIN nodes_ngrams nn1 ON nn1.ngram_id = ngs.id
216 JOIN nodes_ngrams nn2 ON nn2.ngram_id = ngs.id
217 JOIN nodes n ON n.id = nn2.node_id
218 WHERE nn1.node_id = ? -- Master listId
219 AND n.parent_id = ? -- Master CorpusId or AnnuaireId
220 AND n.typename = ? -- both type of childs (Documents or Contacts)
221 AND nn2.ngrams_type = ? -- both type of ngrams (Authors or Terms?)
222 )
223
224 SELECT COALESCE(tu.terms,tm.terms) AS terms
225 , COALESCE(tu.n,tm.n) AS n
226 , COALESCE(tu.ngrams_type,tm.ngrams_type) AS ngrams_type
227 , SUM(COALESCE(tu.weight,tm.weight)) AS weight
228 FROM tableUser tu RIGHT JOIN tableMaster tm ON tu.terms = tm.terms
229 GROUP BY tu.terms,tm.terms,tu.n,tm.n,tu.ngrams_type,tm.ngrams_type;
230
231 |]
232
233 type ListIdUser = Int
234 type ListIdMaster = Int
235
236
237 getNgramsGroup :: ListIdUser -> ListIdMaster -> Cmd [(Text, Text)]
238 getNgramsGroup lu lm = mkCmd $ \conn -> DPS.query conn querySelectNgramsGroup (lu,lm)
239
240 querySelectNgramsGroup :: DPS.Query
241 querySelectNgramsGroup = [sql|
242 WITH groupUser AS (
243 SELECT n1.terms AS t1, n2.terms AS t2 FROM nodes_ngrams_ngrams nnn
244 JOIN ngrams n1 ON n1.id = nnn.ngram1_id
245 JOIN ngrams n2 ON n2.id = nnn.ngram2_id
246 WHERE
247 nnn.node_id = ? -- User listId
248 ),
249 groupMaster AS (
250 SELECT n1.terms AS t1, n2.terms AS t2 FROM nodes_ngrams_ngrams nnn
251 JOIN ngrams n1 ON n1.id = nnn.ngram1_id
252 JOIN ngrams n2 ON n2.id = nnn.ngram2_id
253 WHERE
254 nnn.node_id = ? -- Master listId
255 )
256 SELECT COALESCE(gu.t1,gm.t1) AS ngram1_id
257 , COALESCE(gu.t2,gm.t2) AS ngram2_id
258 FROM groupUser gu RIGHT JOIN groupMaster gm ON gu.t1 = gm.t1
259 |]