2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Ngrams connection to the Database.
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleContexts #-}
17 {-# LANGUAGE FlexibleInstances #-}
18 {-# LANGUAGE FunctionalDependencies #-}
19 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
20 {-# LANGUAGE MultiParamTypeClasses #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE QuasiQuotes #-}
24 {-# LANGUAGE RankNTypes #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Database.Schema.Ngrams where
29 import Control.Lens (makeLenses, over)
30 import Control.Monad (mzero)
32 import Data.Aeson.Types (toJSONKeyText)
33 import Data.ByteString.Internal (ByteString)
34 import Data.Map (Map, fromList, lookup)
35 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
36 import Data.Swagger (ToParamSchema, toParamSchema, ToSchema)
37 import Data.Text (Text, splitOn, pack)
38 import GHC.Generics (Generic)
39 import Gargantext.Core.Types (TODO(..))
40 import Gargantext.Database.Admin.Utils (Cmd, runPGSQuery, runOpaQuery, formatPGSQuery)
41 import Gargantext.Prelude
42 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
43 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
44 import Text.Read (read)
45 import Gargantext.Database.Schema.Prelude
46 import qualified Database.PostgreSQL.Simple as PGS
50 type NgramsTerms = Text
53 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: !id
54 , _ngrams_terms :: !terms
58 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
62 type NgramsRead = NgramsPoly (Column PGInt4)
66 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
67 (Column (Nullable PGText))
68 (Column (Nullable PGInt4))
70 type NgramsDb = NgramsPoly Int Text Int
72 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
73 makeLenses ''NgramsPoly
76 ngramsTable :: Table NgramsWrite NgramsRead
77 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
78 , _ngrams_terms = required "terms"
79 , _ngrams_n = required "n"
83 queryNgramsTable :: Query NgramsRead
84 queryNgramsTable = queryTable ngramsTable
86 dbGetNgramsDb :: Cmd err [NgramsDb]
87 dbGetNgramsDb = runOpaQuery queryNgramsTable
89 -- | Main Ngrams Types
91 -- Typed Ngrams localize the context of the ngrams
92 -- ngrams in source field of document has Sources Type
93 -- ngrams in authors field of document has Authors Type
94 -- ngrams in text (title or abstract) of documents has Terms Type
95 data NgramsType = Authors | Institutes | Sources | NgramsTerms
96 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
98 ngramsTypes :: [NgramsType]
99 ngramsTypes = [minBound..]
101 instance ToSchema NgramsType
103 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
106 instance FromJSON NgramsType
107 instance FromJSONKey NgramsType where
108 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
109 instance ToJSON NgramsType
110 instance ToJSONKey NgramsType where
111 toJSONKey = toJSONKeyText (pack . show)
113 newtype NgramsTypeId = NgramsTypeId Int
114 deriving (Eq, Show, Ord, Num)
116 instance ToField NgramsTypeId where
117 toField (NgramsTypeId n) = toField n
119 instance FromField NgramsTypeId where
120 fromField fld mdata = do
121 n <- fromField fld mdata
122 if (n :: Int) > 0 then return $ NgramsTypeId n
125 instance FromHttpApiData NgramsType where
126 parseUrlPiece n = pure $ (read . cs) n
128 instance ToParamSchema NgramsType where
129 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
132 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
134 queryRunnerColumnDefault = fieldQueryRunnerColumn
136 pgNgramsType :: NgramsType -> Column PGInt4
137 pgNgramsType = pgNgramsTypeId . ngramsTypeId
139 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
140 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
142 ngramsTypeId :: NgramsType -> NgramsTypeId
143 ngramsTypeId Authors = 1
144 ngramsTypeId Institutes = 2
145 ngramsTypeId Sources = 3
146 ngramsTypeId NgramsTerms = 4
148 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
149 fromNgramsTypeId id = lookup id
150 $ fromList [ (ngramsTypeId nt,nt)
151 | nt <- [minBound .. maxBound] :: [NgramsType]
154 ------------------------------------------------------------------------
155 -- | TODO put it in Gargantext.Text.Ngrams
156 data Ngrams = Ngrams { _ngramsTerms :: Text
158 } deriving (Generic, Show, Eq, Ord)
161 instance PGS.ToRow Ngrams where
162 toRow (Ngrams t s) = [toField t, toField s]
164 text2ngrams :: Text -> Ngrams
165 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
167 -------------------------------------------------------------------------
168 -- | TODO put it in Gargantext.Text.Ngrams
169 -- Named entity are typed ngrams of Terms Ngrams
171 NgramsT { _ngramsType :: NgramsType
173 } deriving (Generic, Show, Eq, Ord)
177 instance Functor NgramsT where
179 -----------------------------------------------------------------------
183 , _ngramsId :: NgramsId
184 } deriving (Show, Generic, Eq, Ord)
186 makeLenses ''NgramsIndexed
187 ------------------------------------------------------------------------
192 } deriving (Show, Generic, Eq, Ord)
194 instance PGS.FromRow NgramIds where
195 fromRow = NgramIds <$> field <*> field
197 ----------------------
198 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
199 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
201 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
202 indexNgramsT = fmap . indexNgramsWith . withMap
204 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
205 indexNgrams = indexNgramsWith . withMap
207 -- NP: not sure we need it anymore
208 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
209 indexNgramsTWith = fmap . indexNgramsWith
211 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
212 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)
214 -- TODO-ACCESS: access must not be checked here but when insertNgrams is called.
215 insertNgrams :: [Ngrams] -> Cmd err (Map NgramsTerms NgramsId)
216 insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
218 -- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
219 insertNgrams' :: [Ngrams] -> Cmd err [NgramIds]
220 insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
222 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
224 insertNgrams_Debug :: [(NgramsTerms, Size)] -> Cmd err ByteString
225 insertNgrams_Debug ns = formatPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
227 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
229 ----------------------
230 queryInsertNgrams :: PGS.Query
231 queryInsertNgrams = [sql|
232 WITH input_rows(terms,n) AS (?)
234 INSERT INTO ngrams (terms,n)
235 SELECT * FROM input_rows
236 ON CONFLICT (terms) DO NOTHING -- unique index created here
245 JOIN ngrams c USING (terms); -- columns of unique index