2 Module : Gargantext.Database.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE FunctionalDependencies #-}
18 {-# LANGUAGE MultiParamTypeClasses #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE QuasiQuotes #-}
22 {-# LANGUAGE TemplateHaskell #-}
24 module Gargantext.Database.Ngrams where
27 import Control.Lens (makeLenses)
28 import Data.ByteString.Internal (ByteString)
29 import Data.Map (Map, fromList, lookup)
30 import Data.Text (Text)
31 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
32 import Database.PostgreSQL.Simple.SqlQQ (sql)
33 import Database.PostgreSQL.Simple.ToField (toField)
34 import Database.PostgreSQL.Simple.ToRow (toRow)
35 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
36 import GHC.Generics (Generic)
37 import Gargantext.Database.Node (mkCmd, Cmd(..))
38 import Gargantext.Prelude
39 import qualified Database.PostgreSQL.Simple as DPS
42 --data NgramPoly id terms n = NgramDb { ngram_id :: id
43 -- , ngram_terms :: terms
47 --type NgramWrite = NgramPoly (Maybe (Column PGInt4))
51 --type NgramRead = NgramPoly (Column PGInt4)
55 ----type Ngram = NgramPoly Int Text Int
57 -- $(makeAdaptorAndInstance "pNgram" ''NgramPoly)
58 -- $(makeLensesWith abbreviatedFields ''NgramPoly)
60 --ngramTable :: Table NgramWrite NgramRead
61 --ngramTable = Table "ngrams" (pNgram NgramDb { ngram_id = optional "id"
62 -- , ngram_terms = required "terms"
63 -- , ngram_n = required "n"
67 --queryNgramTable :: Query NgramRead
68 --queryNgramTable = queryTable ngramTable
70 --dbGetNgrams :: PGS.Connection -> IO [NgramDb]
71 --dbGetNgrams conn = runQuery conn queryNgramTable
73 -- | Main Ngrams Types
75 -- Typed Ngrams localize the context of the ngrams
76 -- ngrams in source field of document has Sources Type
77 -- ngrams in authors field of document has Authors Type
78 -- ngrams in text (title or abstract) of documents has Terms Type
79 data NgramsType = Sources | Authors | Terms
81 ngramsTypeId :: NgramsType -> Int
82 ngramsTypeId Terms = 1
83 ngramsTypeId Authors = 2
84 ngramsTypeId Sources = 3
86 type NgramsTerms = Text
90 ------------------------------------------------------------------------
91 -- | TODO put it in Gargantext.Text.Ngrams
92 data Ngrams = Ngrams { _ngramsTerms :: Text
95 instance Eq Ngrams where
97 instance Ord Ngrams where
100 instance DPS.ToRow Ngrams where
101 toRow (Ngrams t s) = [toField t, toField s]
103 -------------------------------------------------------------------------
104 -- | TODO put it in Gargantext.Text.Ngrams
105 -- Named entity are typed ngrams of Terms Ngrams
107 NgramsT { _ngramsType :: NgramsType
111 instance Eq (NgramsT a) where (==) = (==)
112 instance Ord (NgramsT a) where compare = compare
114 -----------------------------------------------------------------------
118 , _ngramsId :: NgramsId
121 instance Eq NgramsIndexed where
123 instance Ord NgramsIndexed where
125 makeLenses ''NgramsIndexed
127 ------------------------------------------------------------------------
132 } deriving (Show, Generic)
134 instance DPS.FromRow NgramIds where
135 fromRow = NgramIds <$> field <*> field
137 ----------------------
138 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
139 indexNgramsT m ngrId = indexNgramsTWith f ngrId
141 f n = maybe (panic "indexNgramsT: should not happen") identity (lookup n m)
143 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams-> NgramsT NgramsIndexed
144 indexNgramsTWith f (NgramsT t n) = NgramsT t (NgramsIndexed n ((f . _ngramsTerms) n))
146 insertNgrams :: [Ngrams] -> Cmd (Map NgramsTerms NgramsId)
147 insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
149 insertNgrams' :: [Ngrams] -> Cmd [NgramIds]
150 insertNgrams' ns = mkCmd $ \conn -> DPS.query conn queryInsertNgrams (DPS.Only $ Values fields ns)
152 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
155 insertNgrams_Debug :: [(NgramsTerms, Size)] -> Cmd ByteString
156 insertNgrams_Debug ns = mkCmd $ \conn -> DPS.formatQuery conn queryInsertNgrams (DPS.Only $ Values fields ns)
158 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
160 ----------------------
161 queryInsertNgrams :: DPS.Query
162 queryInsertNgrams = [sql|
163 WITH input_rows(terms,n) AS (?)
165 INSERT INTO ngrams (terms,n)
166 SELECT * FROM input_rows
167 ON CONFLICT (terms) DO NOTHING -- unique index created here
176 JOIN ngrams c USING (terms); -- columns of unique index