2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE FunctionalDependencies #-}
18 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
19 {-# LANGUAGE MultiParamTypeClasses #-}
20 {-# LANGUAGE NoImplicitPrelude #-}
21 {-# LANGUAGE OverloadedStrings #-}
22 {-# LANGUAGE QuasiQuotes #-}
23 {-# LANGUAGE RankNTypes #-}
24 {-# LANGUAGE TemplateHaskell #-}
26 module Gargantext.Database.Schema.Ngrams where
28 import Control.Lens (makeLenses, over)
29 import Control.Monad (mzero)
31 import Data.Aeson.Types (toJSONKeyText)
32 import Data.ByteString.Internal (ByteString)
33 import Data.Map (Map, fromList, lookup)
34 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
35 import Data.Text (Text, splitOn, pack)
36 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
37 import Database.PostgreSQL.Simple.SqlQQ (sql)
38 import Database.PostgreSQL.Simple.ToField (toField, ToField)
39 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
40 import Database.PostgreSQL.Simple.ToRow (toRow)
41 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
42 import GHC.Generics (Generic)
43 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, formatPGSQuery)
44 import Gargantext.Core.Types (TODO(..))
45 import Gargantext.Prelude
46 import Opaleye hiding (FromField)
47 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
48 import Text.Read (read)
49 import Data.Swagger (ToParamSchema, toParamSchema, ToSchema)
50 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
51 import qualified Database.PostgreSQL.Simple as PGS
55 type NgramsTerms = Text
58 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: id
59 , _ngrams_terms :: terms
63 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
67 type NgramsRead = NgramsPoly (Column PGInt4)
71 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
72 (Column (Nullable PGText))
73 (Column (Nullable PGInt4))
75 type NgramsDb = NgramsPoly Int Text Int
77 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
78 makeLenses ''NgramsPoly
81 ngramsTable :: Table NgramsWrite NgramsRead
82 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
83 , _ngrams_terms = required "terms"
84 , _ngrams_n = required "n"
88 queryNgramsTable :: Query NgramsRead
89 queryNgramsTable = queryTable ngramsTable
91 dbGetNgramsDb :: Cmd err [NgramsDb]
92 dbGetNgramsDb = runOpaQuery queryNgramsTable
94 -- | Main Ngrams Types
96 -- Typed Ngrams localize the context of the ngrams
97 -- ngrams in source field of document has Sources Type
98 -- ngrams in authors field of document has Authors Type
99 -- ngrams in text (title or abstract) of documents has Terms Type
100 data NgramsType = Authors | Institutes | Sources | NgramsTerms
101 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
103 ngramsTypes :: [NgramsType]
104 ngramsTypes = [minBound..]
106 instance ToSchema NgramsType
108 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
111 instance FromJSON NgramsType
112 instance FromJSONKey NgramsType where
113 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
114 instance ToJSON NgramsType
115 instance ToJSONKey NgramsType where
116 toJSONKey = toJSONKeyText (pack . show)
118 newtype NgramsTypeId = NgramsTypeId Int
119 deriving (Eq, Show, Ord, Num)
121 instance ToField NgramsTypeId where
122 toField (NgramsTypeId n) = toField n
124 instance FromField NgramsTypeId where
125 fromField fld mdata = do
126 n <- fromField fld mdata
127 if (n :: Int) > 0 then return $ NgramsTypeId n
130 instance FromHttpApiData NgramsType where
131 parseUrlPiece n = pure $ (read . cs) n
133 instance ToParamSchema NgramsType where
134 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
139 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
141 queryRunnerColumnDefault = fieldQueryRunnerColumn
143 pgNgramsType :: NgramsType -> Column PGInt4
144 pgNgramsType = pgNgramsTypeId . ngramsTypeId
146 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
147 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
149 ngramsTypeId :: NgramsType -> NgramsTypeId
150 ngramsTypeId Authors = 1
151 ngramsTypeId Institutes = 2
152 ngramsTypeId Sources = 3
153 ngramsTypeId NgramsTerms = 4
155 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
156 fromNgramsTypeId id = lookup id $ fromList [(ngramsTypeId nt,nt) | nt <- [minBound .. maxBound] :: [NgramsType]]
158 ------------------------------------------------------------------------
159 -- | TODO put it in Gargantext.Text.Ngrams
160 data Ngrams = Ngrams { _ngramsTerms :: Text
162 } deriving (Generic, Show, Eq, Ord)
165 instance PGS.ToRow Ngrams where
166 toRow (Ngrams t s) = [toField t, toField s]
168 text2ngrams :: Text -> Ngrams
169 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
171 -------------------------------------------------------------------------
172 -- | TODO put it in Gargantext.Text.Ngrams
173 -- Named entity are typed ngrams of Terms Ngrams
175 NgramsT { _ngramsType :: NgramsType
177 } deriving (Generic, Show, Eq, Ord)
181 instance Functor NgramsT where
183 -----------------------------------------------------------------------
187 , _ngramsId :: NgramsId
188 } deriving (Show, Generic, Eq, Ord)
190 makeLenses ''NgramsIndexed
191 ------------------------------------------------------------------------
196 } deriving (Show, Generic, Eq, Ord)
198 instance PGS.FromRow NgramIds where
199 fromRow = NgramIds <$> field <*> field
201 ----------------------
202 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
203 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
205 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
206 indexNgramsT = fmap . indexNgramsWith . withMap
208 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
209 indexNgrams = indexNgramsWith . withMap
211 -- NP: not sure we need it anymore
212 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
213 indexNgramsTWith = fmap . indexNgramsWith
215 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
216 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)
218 -- TODO-ACCESS: access must not be checked here but when insertNgrams is called.
219 insertNgrams :: [Ngrams] -> Cmd err (Map NgramsTerms NgramsId)
220 insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
222 -- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
223 insertNgrams' :: [Ngrams] -> Cmd err [NgramIds]
224 insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
226 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
228 insertNgrams_Debug :: [(NgramsTerms, Size)] -> Cmd err ByteString
229 insertNgrams_Debug ns = formatPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
231 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
233 ----------------------
234 queryInsertNgrams :: PGS.Query
235 queryInsertNgrams = [sql|
236 WITH input_rows(terms,n) AS (?)
238 INSERT INTO ngrams (terms,n)
239 SELECT * FROM input_rows
240 ON CONFLICT (terms) DO NOTHING -- unique index created here
249 JOIN ngrams c USING (terms); -- columns of unique index