2 Module : Gargantext.Database.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE FunctionalDependencies #-}
18 {-# LANGUAGE MultiParamTypeClasses #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE QuasiQuotes #-}
22 {-# LANGUAGE TemplateHaskell #-}
24 module Gargantext.Database.Ngrams where
27 import Control.Lens (makeLenses)
28 import Data.ByteString.Internal (ByteString)
29 import Data.Map (Map, fromList, lookup)
30 import Data.Text (Text, splitOn)
31 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
32 import Database.PostgreSQL.Simple.SqlQQ (sql)
33 import Database.PostgreSQL.Simple.ToField (toField)
34 import Database.PostgreSQL.Simple.ToRow (toRow)
35 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
36 import GHC.Generics (Generic)
37 import Gargantext.Database.Node (mkCmd, Cmd(..))
38 import Gargantext.Prelude
39 import qualified Database.PostgreSQL.Simple as DPS
42 --data NgramPoly id terms n = NgramDb { ngram_id :: id
43 -- , ngram_terms :: terms
47 --type NgramWrite = NgramPoly (Maybe (Column PGInt4))
51 --type NgramRead = NgramPoly (Column PGInt4)
55 ----type Ngram = NgramPoly Int Text Int
57 -- $(makeAdaptorAndInstance "pNgram" ''NgramPoly)
58 -- $(makeLensesWith abbreviatedFields ''NgramPoly)
60 --ngramTable :: Table NgramWrite NgramRead
61 --ngramTable = Table "ngrams" (pNgram NgramDb { ngram_id = optional "id"
62 -- , ngram_terms = required "terms"
63 -- , ngram_n = required "n"
67 --queryNgramTable :: Query NgramRead
68 --queryNgramTable = queryTable ngramTable
70 --dbGetNgrams :: PGS.Connection -> IO [NgramDb]
71 --dbGetNgrams conn = runQuery conn queryNgramTable
73 -- | Main Ngrams Types
75 -- Typed Ngrams localize the context of the ngrams
76 -- ngrams in source field of document has Sources Type
77 -- ngrams in authors field of document has Authors Type
78 -- ngrams in text (title or abstract) of documents has Terms Type
79 data NgramsType = Sources | Authors | Terms
82 ngramsTypeId :: NgramsType -> Int
83 ngramsTypeId Terms = 1
84 ngramsTypeId Authors = 2
85 ngramsTypeId Sources = 3
87 type NgramsTerms = Text
91 ------------------------------------------------------------------------
92 -- | TODO put it in Gargantext.Text.Ngrams
93 data Ngrams = Ngrams { _ngramsTerms :: Text
96 instance Eq Ngrams where
98 instance Ord Ngrams where
101 instance DPS.ToRow Ngrams where
102 toRow (Ngrams t s) = [toField t, toField s]
104 text2ngrams :: Text -> Ngrams
105 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
107 -------------------------------------------------------------------------
108 -- | TODO put it in Gargantext.Text.Ngrams
109 -- Named entity are typed ngrams of Terms Ngrams
111 NgramsT { _ngramsType :: NgramsType
115 instance Eq (NgramsT a)
121 instance Ord (NgramsT a) where compare = compare
125 -----------------------------------------------------------------------
129 , _ngramsId :: NgramsId
132 instance Eq NgramsIndexed where
134 instance Ord NgramsIndexed where
136 makeLenses ''NgramsIndexed
138 ------------------------------------------------------------------------
143 } deriving (Show, Generic)
145 instance DPS.FromRow NgramIds where
146 fromRow = NgramIds <$> field <*> field
148 ----------------------
149 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
150 indexNgramsT m ngrId = indexNgramsTWith f ngrId
152 f n = maybe (panic "indexNgramsT: should not happen") identity (lookup n m)
154 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams-> NgramsT NgramsIndexed
155 indexNgramsTWith f (NgramsT t n) = NgramsT t (NgramsIndexed n ((f . _ngramsTerms) n))
157 insertNgrams :: [Ngrams] -> Cmd (Map NgramsTerms NgramsId)
158 insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
160 insertNgrams' :: [Ngrams] -> Cmd [NgramIds]
161 insertNgrams' ns = mkCmd $ \conn -> DPS.query conn queryInsertNgrams (DPS.Only $ Values fields ns)
163 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
166 insertNgrams_Debug :: [(NgramsTerms, Size)] -> Cmd ByteString
167 insertNgrams_Debug ns = mkCmd $ \conn -> DPS.formatQuery conn queryInsertNgrams (DPS.Only $ Values fields ns)
169 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
171 ----------------------
172 queryInsertNgrams :: DPS.Query
173 queryInsertNgrams = [sql|
174 WITH input_rows(terms,n) AS (?)
176 INSERT INTO ngrams (terms,n)
177 SELECT * FROM input_rows
178 ON CONFLICT (terms) DO NOTHING -- unique index created here
187 JOIN ngrams c USING (terms); -- columns of unique index