2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleContexts #-}
17 {-# LANGUAGE FlexibleInstances #-}
18 {-# LANGUAGE FunctionalDependencies #-}
19 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
20 {-# LANGUAGE MultiParamTypeClasses #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE QuasiQuotes #-}
24 {-# LANGUAGE RankNTypes #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Database.Schema.Ngrams where
29 import Control.Lens (makeLenses, over)
30 import Control.Monad (mzero)
32 import Data.Aeson.Types (toJSONKeyText)
33 import Data.ByteString.Internal (ByteString)
34 import Data.Map (Map, fromList, lookup)
35 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
36 import Data.Text (Text, splitOn, pack)
37 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
38 import Database.PostgreSQL.Simple.SqlQQ (sql)
39 import Database.PostgreSQL.Simple.ToField (toField, ToField)
40 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
41 import Database.PostgreSQL.Simple.ToRow (toRow)
42 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
43 import GHC.Generics (Generic)
44 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, formatPGSQuery)
45 import Gargantext.Core.Types (TODO(..))
46 import Gargantext.Prelude
47 import Opaleye hiding (FromField)
48 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
49 import Text.Read (read)
50 import Data.Swagger (ToParamSchema, toParamSchema, ToSchema)
51 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
52 import qualified Database.PostgreSQL.Simple as PGS
56 type NgramsTerms = Text
59 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: id
60 , _ngrams_terms :: terms
64 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
68 type NgramsRead = NgramsPoly (Column PGInt4)
72 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
73 (Column (Nullable PGText))
74 (Column (Nullable PGInt4))
76 type NgramsDb = NgramsPoly Int Text Int
78 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
79 makeLenses ''NgramsPoly
82 ngramsTable :: Table NgramsWrite NgramsRead
83 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
84 , _ngrams_terms = required "terms"
85 , _ngrams_n = required "n"
89 queryNgramsTable :: Query NgramsRead
90 queryNgramsTable = queryTable ngramsTable
92 dbGetNgramsDb :: Cmd err [NgramsDb]
93 dbGetNgramsDb = runOpaQuery queryNgramsTable
95 -- | Main Ngrams Types
97 -- Typed Ngrams localize the context of the ngrams
98 -- ngrams in source field of document has Sources Type
99 -- ngrams in authors field of document has Authors Type
100 -- ngrams in text (title or abstract) of documents has Terms Type
101 data NgramsType = Authors | Institutes | Sources | NgramsTerms
102 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
104 ngramsTypes :: [NgramsType]
105 ngramsTypes = [minBound..]
107 instance ToSchema NgramsType
109 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
112 instance FromJSON NgramsType
113 instance FromJSONKey NgramsType where
114 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
115 instance ToJSON NgramsType
116 instance ToJSONKey NgramsType where
117 toJSONKey = toJSONKeyText (pack . show)
119 newtype NgramsTypeId = NgramsTypeId Int
120 deriving (Eq, Show, Ord, Num)
122 instance ToField NgramsTypeId where
123 toField (NgramsTypeId n) = toField n
125 instance FromField NgramsTypeId where
126 fromField fld mdata = do
127 n <- fromField fld mdata
128 if (n :: Int) > 0 then return $ NgramsTypeId n
131 instance FromHttpApiData NgramsType where
132 parseUrlPiece n = pure $ (read . cs) n
134 instance ToParamSchema NgramsType where
135 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
140 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
142 queryRunnerColumnDefault = fieldQueryRunnerColumn
144 pgNgramsType :: NgramsType -> Column PGInt4
145 pgNgramsType = pgNgramsTypeId . ngramsTypeId
147 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
148 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
150 ngramsTypeId :: NgramsType -> NgramsTypeId
151 ngramsTypeId Authors = 1
152 ngramsTypeId Institutes = 2
153 ngramsTypeId Sources = 3
154 ngramsTypeId NgramsTerms = 4
156 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
157 fromNgramsTypeId id = lookup id $ fromList [(ngramsTypeId nt,nt) | nt <- [minBound .. maxBound] :: [NgramsType]]
159 ------------------------------------------------------------------------
160 -- | TODO put it in Gargantext.Text.Ngrams
161 data Ngrams = Ngrams { _ngramsTerms :: Text
163 } deriving (Generic, Show, Eq, Ord)
166 instance PGS.ToRow Ngrams where
167 toRow (Ngrams t s) = [toField t, toField s]
169 text2ngrams :: Text -> Ngrams
170 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
172 -------------------------------------------------------------------------
173 -- | TODO put it in Gargantext.Text.Ngrams
174 -- Named entity are typed ngrams of Terms Ngrams
176 NgramsT { _ngramsType :: NgramsType
178 } deriving (Generic, Show, Eq, Ord)
182 instance Functor NgramsT where
184 -----------------------------------------------------------------------
188 , _ngramsId :: NgramsId
189 } deriving (Show, Generic, Eq, Ord)
191 makeLenses ''NgramsIndexed
192 ------------------------------------------------------------------------
197 } deriving (Show, Generic, Eq, Ord)
199 instance PGS.FromRow NgramIds where
200 fromRow = NgramIds <$> field <*> field
202 ----------------------
203 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
204 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
206 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
207 indexNgramsT = fmap . indexNgramsWith . withMap
209 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
210 indexNgrams = indexNgramsWith . withMap
212 -- NP: not sure we need it anymore
213 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
214 indexNgramsTWith = fmap . indexNgramsWith
216 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
217 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)
219 -- TODO-ACCESS: access must not be checked here but when insertNgrams is called.
220 insertNgrams :: [Ngrams] -> Cmd err (Map NgramsTerms NgramsId)
221 insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
223 -- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
224 insertNgrams' :: [Ngrams] -> Cmd err [NgramIds]
225 insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
227 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
229 insertNgrams_Debug :: [(NgramsTerms, Size)] -> Cmd err ByteString
230 insertNgrams_Debug ns = formatPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
232 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
234 ----------------------
235 queryInsertNgrams :: PGS.Query
236 queryInsertNgrams = [sql|
237 WITH input_rows(terms,n) AS (?)
239 INSERT INTO ngrams (terms,n)
240 SELECT * FROM input_rows
241 ON CONFLICT (terms) DO NOTHING -- unique index created here
250 JOIN ngrams c USING (terms); -- columns of unique index