2 Module : Gargantext.Database.Ngram
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE FunctionalDependencies #-}
18 {-# LANGUAGE MultiParamTypeClasses #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE QuasiQuotes #-}
22 {-# LANGUAGE TemplateHaskell #-}
24 module Gargantext.Database.Ngram where
27 import Control.Lens (makeLenses)
28 import Control.Lens.TH (makeLensesWith, abbreviatedFields)
29 import Data.ByteString.Internal (ByteString)
30 import Data.List (find)
31 import Data.Map (Map, fromList, lookup)
32 import Data.Maybe (Maybe)
33 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
34 import Data.Text (Text)
35 import Database.PostgreSQL.Simple.FromField ( FromField, fromField)
36 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
37 import Database.PostgreSQL.Simple.SqlQQ (sql)
38 import Database.PostgreSQL.Simple.ToField (toField)
39 import Database.PostgreSQL.Simple.ToRow (toRow)
40 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
41 import GHC.Generics (Generic)
42 import Gargantext.Database.Node (runCmd, mkCmd, Cmd(..))
43 import Gargantext.Prelude
44 import qualified Database.PostgreSQL.Simple as DPS
47 --data NgramPoly id terms n = NgramDb { ngram_id :: id
48 -- , ngram_terms :: terms
52 --type NgramWrite = NgramPoly (Maybe (Column PGInt4))
56 --type NgramRead = NgramPoly (Column PGInt4)
60 ----type Ngram = NgramPoly Int Text Int
62 -- $(makeAdaptorAndInstance "pNgram" ''NgramPoly)
63 -- $(makeLensesWith abbreviatedFields ''NgramPoly)
65 --ngramTable :: Table NgramWrite NgramRead
66 --ngramTable = Table "ngrams" (pNgram NgramDb { ngram_id = optional "id"
67 -- , ngram_terms = required "terms"
68 -- , ngram_n = required "n"
72 --queryNgramTable :: Query NgramRead
73 --queryNgramTable = queryTable ngramTable
75 --dbGetNgrams :: PGS.Connection -> IO [NgramDb]
76 --dbGetNgrams conn = runQuery conn queryNgramTable
78 -- | Main Ngrams Types
80 -- Typed Ngrams localize the context of the ngrams
81 -- ngrams in source field of document has Sources Type
82 -- ngrams in authors field of document has Authors Type
83 -- ngrams in text (title or abstract) of documents has Terms Type
84 data NgramsType = Sources | Authors | Terms
86 ngramsTypeId :: NgramsType -> Int
87 ngramsTypeId Terms = 1
88 ngramsTypeId Authors = 2
89 ngramsTypeId Sources = 3
91 type NgramsTerms = Text
95 ------------------------------------------------------------------------
96 -- | TODO put it in Gargantext.Text.Ngrams
97 data Ngrams = Ngrams { _ngramsTerms :: Text
100 instance Eq Ngrams where
102 instance Ord Ngrams where
105 instance DPS.ToRow Ngrams where
106 toRow (Ngrams t s) = [toField t, toField s]
108 -------------------------------------------------------------------------
109 -- | TODO put it in Gargantext.Text.Ngrams
110 -- Named entity are typed ngrams of Terms Ngrams
111 data NgramsT a = NgramsT { _ngramsType :: NgramsType
114 instance Eq (NgramsT a) where (==) = (==)
115 instance Ord (NgramsT a) where compare = compare
117 -----------------------------------------------------------------------
118 data NgramsIndexed = NgramsIndexed { _ngrams :: Ngrams
119 , _ngramsId :: NgramsId
121 instance Eq NgramsIndexed where
123 instance Ord NgramsIndexed where
125 makeLenses ''NgramsIndexed
126 ------------------------------------------------------------------------
127 data NgramIds = NgramIds { ngramId :: Int
129 } deriving (Show, Generic)
131 instance DPS.FromRow NgramIds where
132 fromRow = NgramIds <$> field <*> field
134 ----------------------
135 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
136 indexNgramsT m n = indexNgramsTWith f n
138 f n = maybe (panic "indexNgramsT: should not happen") identity (lookup n m)
140 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams-> NgramsT NgramsIndexed
141 indexNgramsTWith f (NgramsT t n) = NgramsT t (NgramsIndexed n ((f . _ngramsTerms) n))
142 ----------------------
143 insertNgrams :: [Ngrams] -> Cmd (Map NgramsTerms NgramsId)
144 insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
146 insertNgrams' :: [Ngrams] -> Cmd [NgramIds]
147 insertNgrams' ns = mkCmd $ \conn -> DPS.query conn queryInsertNgrams (DPS.Only $ Values fields ns)
149 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
152 insertNgrams_Debug :: [(NgramsTerms, Size)] -> Cmd ByteString
153 insertNgrams_Debug ns = mkCmd $ \conn -> DPS.formatQuery conn queryInsertNgrams (DPS.Only $ Values fields ns)
155 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
157 ----------------------
158 queryInsertNgrams :: DPS.Query
159 queryInsertNgrams = [sql|
160 WITH input_rows(terms,n) AS (?)
162 INSERT INTO ngrams (terms,n)
163 SELECT * FROM input_rows
164 ON CONFLICT (terms) DO NOTHING -- unique index created here
173 JOIN ngrams c USING (terms); -- columns of unique index