2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE FunctionalDependencies #-}
18 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
19 {-# LANGUAGE MultiParamTypeClasses #-}
20 {-# LANGUAGE NoImplicitPrelude #-}
21 {-# LANGUAGE OverloadedStrings #-}
22 {-# LANGUAGE QuasiQuotes #-}
23 {-# LANGUAGE RankNTypes #-}
24 {-# LANGUAGE TemplateHaskell #-}
26 module Gargantext.Database.Schema.Ngrams where
28 import Control.Lens (makeLenses, over)
29 import Control.Monad (mzero)
31 import Data.Aeson.Types (toJSONKeyText)
32 import Data.ByteString.Internal (ByteString)
33 import Data.Map (Map, fromList, lookup)
34 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
35 import Data.Text (Text, splitOn, pack)
36 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
37 import Database.PostgreSQL.Simple.SqlQQ (sql)
38 import Database.PostgreSQL.Simple.ToField (toField, ToField)
39 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
40 import Database.PostgreSQL.Simple.ToRow (toRow)
41 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
42 import GHC.Generics (Generic)
43 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, formatPGSQuery)
44 import Gargantext.Core.Types (TODO(..))
45 import Gargantext.Prelude
46 import Opaleye hiding (FromField)
47 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
48 import Text.Read (read)
49 import Data.Swagger (ToParamSchema, toParamSchema)
50 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
51 import qualified Database.PostgreSQL.Simple as PGS
55 type NgramsTerms = Text
58 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: id
59 , _ngrams_terms :: terms
63 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
67 type NgramsRead = NgramsPoly (Column PGInt4)
71 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
72 (Column (Nullable PGText))
73 (Column (Nullable PGInt4))
75 type NgramsDb = NgramsPoly Int Text Int
77 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
78 makeLenses ''NgramsPoly
81 ngramsTable :: Table NgramsWrite NgramsRead
82 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
83 , _ngrams_terms = required "terms"
84 , _ngrams_n = required "n"
88 queryNgramsTable :: Query NgramsRead
89 queryNgramsTable = queryTable ngramsTable
91 dbGetNgramsDb :: Cmd err [NgramsDb]
92 dbGetNgramsDb = runOpaQuery queryNgramsTable
94 -- | Main Ngrams Types
96 -- Typed Ngrams localize the context of the ngrams
97 -- ngrams in source field of document has Sources Type
98 -- ngrams in authors field of document has Authors Type
99 -- ngrams in text (title or abstract) of documents has Terms Type
100 data NgramsType = Authors | Institutes | Sources | NgramsTerms
101 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
103 instance FromJSON NgramsType
104 instance FromJSONKey NgramsType where
105 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
106 instance ToJSON NgramsType
107 instance ToJSONKey NgramsType where
108 toJSONKey = toJSONKeyText (pack . show)
110 newtype NgramsTypeId = NgramsTypeId Int
111 deriving (Eq, Show, Ord, Num)
113 instance ToField NgramsTypeId where
114 toField (NgramsTypeId n) = toField n
116 instance FromField NgramsTypeId where
117 fromField fld mdata = do
118 n <- fromField fld mdata
119 if (n :: Int) > 0 then return $ NgramsTypeId n
122 instance FromHttpApiData NgramsType where
123 parseUrlPiece n = pure $ (read . cs) n
125 instance ToParamSchema NgramsType where
126 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
131 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
133 queryRunnerColumnDefault = fieldQueryRunnerColumn
135 pgNgramsType :: NgramsType -> Column PGInt4
136 pgNgramsType = pgNgramsTypeId . ngramsTypeId
138 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
139 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
141 ngramsTypeId :: NgramsType -> NgramsTypeId
142 ngramsTypeId Authors = 1
143 ngramsTypeId Institutes = 2
144 ngramsTypeId Sources = 3
145 ngramsTypeId NgramsTerms = 4
147 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
148 fromNgramsTypeId id = lookup id $ fromList [(ngramsTypeId nt,nt) | nt <- [minBound .. maxBound] :: [NgramsType]]
150 ------------------------------------------------------------------------
151 -- | TODO put it in Gargantext.Text.Ngrams
152 data Ngrams = Ngrams { _ngramsTerms :: Text
154 } deriving (Generic, Show, Eq, Ord)
157 instance PGS.ToRow Ngrams where
158 toRow (Ngrams t s) = [toField t, toField s]
160 text2ngrams :: Text -> Ngrams
161 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
163 -------------------------------------------------------------------------
164 -- | TODO put it in Gargantext.Text.Ngrams
165 -- Named entity are typed ngrams of Terms Ngrams
167 NgramsT { _ngramsType :: NgramsType
169 } deriving (Generic, Show, Eq, Ord)
173 instance Functor NgramsT where
175 -----------------------------------------------------------------------
179 , _ngramsId :: NgramsId
180 } deriving (Show, Generic, Eq, Ord)
182 makeLenses ''NgramsIndexed
183 ------------------------------------------------------------------------
188 } deriving (Show, Generic, Eq, Ord)
190 instance PGS.FromRow NgramIds where
191 fromRow = NgramIds <$> field <*> field
193 ----------------------
194 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
195 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
197 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
198 indexNgramsT = fmap . indexNgramsWith . withMap
200 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
201 indexNgrams = indexNgramsWith . withMap
203 -- NP: not sure we need it anymore
204 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
205 indexNgramsTWith = fmap . indexNgramsWith
207 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
208 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)
210 -- TODO-ACCESS: access must not be checked here but when insertNgrams is called.
211 insertNgrams :: [Ngrams] -> Cmd err (Map NgramsTerms NgramsId)
212 insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
214 -- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
215 insertNgrams' :: [Ngrams] -> Cmd err [NgramIds]
216 insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
218 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
220 insertNgrams_Debug :: [(NgramsTerms, Size)] -> Cmd err ByteString
221 insertNgrams_Debug ns = formatPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
223 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
225 ----------------------
226 queryInsertNgrams :: PGS.Query
227 queryInsertNgrams = [sql|
228 WITH input_rows(terms,n) AS (?)
230 INSERT INTO ngrams (terms,n)
231 SELECT * FROM input_rows
232 ON CONFLICT (terms) DO NOTHING -- unique index created here
241 JOIN ngrams c USING (terms); -- columns of unique index