2 Module : Gargantext.Database.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE FunctionalDependencies #-}
18 {-# LANGUAGE MultiParamTypeClasses #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE QuasiQuotes #-}
22 {-# LANGUAGE TemplateHaskell #-}
24 module Gargantext.Database.Ngrams where
27 import Control.Lens (makeLenses)
28 import Data.ByteString.Internal (ByteString)
29 import Data.Map (Map, fromList, lookup)
30 import Data.Text (Text, splitOn)
31 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
32 import Database.PostgreSQL.Simple.SqlQQ (sql)
33 import Database.PostgreSQL.Simple.ToField (toField)
34 import Database.PostgreSQL.Simple.ToRow (toRow)
35 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
36 import GHC.Generics (Generic)
37 import Gargantext.Database.Node (mkCmd, Cmd(..))
38 import Gargantext.Prelude
39 import qualified Database.PostgreSQL.Simple as DPS
42 --data NgramPoly id terms n = NgramDb { ngram_id :: id
43 -- , ngram_terms :: terms
47 --type NgramWrite = NgramPoly (Maybe (Column PGInt4))
51 --type NgramRead = NgramPoly (Column PGInt4)
55 ----type Ngram = NgramPoly Int Text Int
57 -- $(makeAdaptorAndInstance "pNgram" ''NgramPoly)
58 -- $(makeLensesWith abbreviatedFields ''NgramPoly)
60 --ngramTable :: Table NgramWrite NgramRead
61 --ngramTable = Table "ngrams" (pNgram NgramDb { ngram_id = optional "id"
62 -- , ngram_terms = required "terms"
63 -- , ngram_n = required "n"
67 --queryNgramTable :: Query NgramRead
68 --queryNgramTable = queryTable ngramTable
70 --dbGetNgrams :: PGS.Connection -> IO [NgramDb]
71 --dbGetNgrams conn = runQuery conn queryNgramTable
73 -- | Main Ngrams Types
75 -- Typed Ngrams localize the context of the ngrams
76 -- ngrams in source field of document has Sources Type
77 -- ngrams in authors field of document has Authors Type
78 -- ngrams in text (title or abstract) of documents has Terms Type
79 data NgramsType = Authors | Institutes | Sources | Terms
80 deriving (Eq, Show, Ord)
82 ngramsTypeId :: NgramsType -> Int
83 ngramsTypeId Authors = 1
84 ngramsTypeId Institutes = 2
85 ngramsTypeId Sources = 3
86 ngramsTypeId Terms = 4
88 type NgramsTerms = Text
92 ------------------------------------------------------------------------
93 -- | TODO put it in Gargantext.Text.Ngrams
94 data Ngrams = Ngrams { _ngramsTerms :: Text
96 } deriving (Generic, Show, Eq, Ord)
99 instance DPS.ToRow Ngrams where
100 toRow (Ngrams t s) = [toField t, toField s]
102 text2ngrams :: Text -> Ngrams
103 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
105 -------------------------------------------------------------------------
106 -- | TODO put it in Gargantext.Text.Ngrams
107 -- Named entity are typed ngrams of Terms Ngrams
109 NgramsT { _ngramsType :: NgramsType
111 } deriving (Generic, Show, Eq, Ord)
114 -----------------------------------------------------------------------
118 , _ngramsId :: NgramsId
119 } deriving (Show, Generic, Eq, Ord)
121 makeLenses ''NgramsIndexed
122 ------------------------------------------------------------------------
127 } deriving (Show, Generic, Eq, Ord)
129 instance DPS.FromRow NgramIds where
130 fromRow = NgramIds <$> field <*> field
132 ----------------------
133 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
134 indexNgramsT m ngrId = indexNgramsTWith f ngrId
136 f n = maybe (panic "indexNgramsT: should not happen") identity (lookup n m)
138 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams-> NgramsT NgramsIndexed
139 indexNgramsTWith f (NgramsT t n) = NgramsT t (NgramsIndexed n ((f . _ngramsTerms) n))
141 insertNgrams :: [Ngrams] -> Cmd (Map NgramsTerms NgramsId)
142 insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
144 insertNgrams' :: [Ngrams] -> Cmd [NgramIds]
145 insertNgrams' ns = mkCmd $ \conn -> DPS.query conn queryInsertNgrams (DPS.Only $ Values fields ns)
147 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
149 insertNgrams_Debug :: [(NgramsTerms, Size)] -> Cmd ByteString
150 insertNgrams_Debug ns = mkCmd $ \conn -> DPS.formatQuery conn queryInsertNgrams (DPS.Only $ Values fields ns)
152 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
154 ----------------------
155 queryInsertNgrams :: DPS.Query
156 queryInsertNgrams = [sql|
157 WITH input_rows(terms,n) AS (?)
159 INSERT INTO ngrams (terms,n)
160 SELECT * FROM input_rows
161 ON CONFLICT (terms) DO NOTHING -- unique index created here
170 JOIN ngrams c USING (terms); -- columns of unique index
178 data NgramsTableParam =
179 NgramsTableParam { _nt_listId :: Int
180 , _nt_corpusId :: Int
181 , _nt_typeNode :: Int
182 , _nt_typeNgrams :: Int
185 type NgramsTableParamUser = NgramsTableParam
186 type NgramsTableParamMaster = NgramsTableParam
188 data NgramsTableData = NgramsTableData { _ntd_terms :: Text
190 , _ntd_ngramsType :: Int
191 , _ntd_weight :: Double
194 getTableNgrams :: NgramsTableParamUser -> NgramsTableParamMaster -> Cmd [(Text, Int, Int, Double)]
195 getTableNgrams (NgramsTableParam ul uc utn utg) (NgramsTableParam ml mc mtn mtg) =
196 mkCmd $ \conn -> DPS.query conn querySelectTableNgrams (ul,uc,utn,utg,ml,mc,mtn,mtg)
199 querySelectTableNgrams :: DPS.Query
200 querySelectTableNgrams = [sql|
202 WITH tableUser AS (select ngs.terms, ngs.n, nn1.ngrams_type,nn2.weight FROM ngrams ngs
203 JOIN nodes_ngrams nn1 ON nn1.ngram_id = ngs.id
204 JOIN nodes_ngrams nn2 ON nn2.ngram_id = ngs.id
205 JOIN nodes n ON n.id = nn2.node_id
206 WHERE nn1.node_id = ? -- User listId
207 AND n.parent_id = ? -- User CorpusId or AnnuaireId
208 AND n.typename = ? -- both type of childs (Documents or Contacts)
209 AND nn2.ngrams_type = ? -- both type of ngrams (Authors or Terms?)
210 ), tableMaster AS (select ngs.terms, ngs.n, nn1.ngrams_type,nn2.weight FROM ngrams ngs
211 JOIN nodes_ngrams nn1 ON nn1.ngram_id = ngs.id
212 JOIN nodes_ngrams nn2 ON nn2.ngram_id = ngs.id
213 JOIN nodes n ON n.id = nn2.node_id
214 WHERE nn1.node_id = ? -- Master listId
215 AND n.parent_id = ? -- Master CorpusId or AnnuaireId
216 AND n.typename = ? -- both type of childs (Documents or Contacts)
217 AND nn2.ngrams_type = ? -- both type of ngrams (Authors or Terms?)
220 SELECT COALESCE(tu.terms,tm.terms) AS terms
221 , COALESCE(tu.n,tm.n) AS n
222 , COALESCE(tu.ngrams_type,tm.ngrams_type) AS ngrams_type
223 , COALESCE(tu.weight,tm.weight) AS weight
224 FROM tableUser tu RIGHT JOIN tableMaster tm ON tu.terms = tm.terms;
228 type ListIdUser = Int
229 type ListIdMaster = Int
232 getNgramsGroup :: ListIdUser -> ListIdMaster -> Cmd [(Text, Text)]
233 getNgramsGroup lu lm = mkCmd $ \conn -> DPS.query conn querySelectNgramsGroup (lu,lm)
235 querySelectNgramsGroup :: DPS.Query
236 querySelectNgramsGroup = [sql|
238 SELECT n1.terms AS t1, n2.terms AS t2 FROM nodes_ngrams_ngrams nnn
239 JOIN ngrams n1 ON n1.id = nnn.ngram1_id
240 JOIN ngrams n2 ON n2.id = nnn.ngram2_id
242 nnn.node_id = ? -- User listId
245 SELECT n1.terms AS t1, n2.terms AS t2 FROM nodes_ngrams_ngrams nnn
246 JOIN ngrams n1 ON n1.id = nnn.ngram1_id
247 JOIN ngrams n2 ON n2.id = nnn.ngram2_id
249 nnn.node_id = ? -- Master listId
251 SELECT COALESCE(gu.t1,gm.t1) AS ngram1_id
252 , COALESCE(gu.t2,gm.t2) AS ngram2_id
253 FROM groupUser gu RIGHT JOIN groupMaster gm ON gu.t1 = gm.t1