]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Schema/Ngrams.hs
[FACTO/WIP] files org and import fix in Database/*
[gargantext.git] / src / Gargantext / Database / Schema / Ngrams.hs
1 {-|
2 Module : Gargantext.Database.Schema.Ngrams
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams connection to the Database.
11
12 -}
13
14 {-# LANGUAGE Arrows #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleContexts #-}
17 {-# LANGUAGE FlexibleInstances #-}
18 {-# LANGUAGE FunctionalDependencies #-}
19 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
20 {-# LANGUAGE MultiParamTypeClasses #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE QuasiQuotes #-}
24 {-# LANGUAGE RankNTypes #-}
25 {-# LANGUAGE TemplateHaskell #-}
26
27 module Gargantext.Database.Schema.Ngrams where
28
29 import Control.Lens (makeLenses, over)
30 import Control.Monad (mzero)
31 import Data.Aeson
32 import Data.Aeson.Types (toJSONKeyText)
33 import Data.ByteString.Internal (ByteString)
34 import Data.Map (Map, fromList, lookup)
35 import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
36 import Data.Swagger (ToParamSchema, toParamSchema, ToSchema)
37 import Data.Text (Text, splitOn, pack)
38 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
39 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
40 import Database.PostgreSQL.Simple.SqlQQ (sql)
41 import Database.PostgreSQL.Simple.ToField (toField, ToField)
42 import Database.PostgreSQL.Simple.ToRow (toRow)
43 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
44 import GHC.Generics (Generic)
45 import Gargantext.Core.Types (TODO(..))
46 import Gargantext.Database.Admin.Utils (Cmd, runPGSQuery, runOpaQuery, formatPGSQuery)
47 import Gargantext.Prelude
48 import Opaleye hiding (FromField)
49 import Prelude (Enum, Bounded, minBound, maxBound, Functor)
50 import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
51 import Text.Read (read)
52 import qualified Database.PostgreSQL.Simple as PGS
53
54
55 type NgramsId = Int
56 type NgramsTerms = Text
57 type Size = Int
58
59 data NgramsPoly id terms n = NgramsDb { _ngrams_id :: id
60 , _ngrams_terms :: terms
61 , _ngrams_n :: n
62 } deriving (Show)
63
64 type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
65 (Column PGText)
66 (Column PGInt4)
67
68 type NgramsRead = NgramsPoly (Column PGInt4)
69 (Column PGText)
70 (Column PGInt4)
71
72 type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
73 (Column (Nullable PGText))
74 (Column (Nullable PGInt4))
75
76 type NgramsDb = NgramsPoly Int Text Int
77
78 $(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
79 makeLenses ''NgramsPoly
80
81
82 ngramsTable :: Table NgramsWrite NgramsRead
83 ngramsTable = Table "ngrams" (pNgramsDb NgramsDb { _ngrams_id = optional "id"
84 , _ngrams_terms = required "terms"
85 , _ngrams_n = required "n"
86 }
87 )
88
89 queryNgramsTable :: Query NgramsRead
90 queryNgramsTable = queryTable ngramsTable
91
92 dbGetNgramsDb :: Cmd err [NgramsDb]
93 dbGetNgramsDb = runOpaQuery queryNgramsTable
94
95 -- | Main Ngrams Types
96 -- | Typed Ngrams
97 -- Typed Ngrams localize the context of the ngrams
98 -- ngrams in source field of document has Sources Type
99 -- ngrams in authors field of document has Authors Type
100 -- ngrams in text (title or abstract) of documents has Terms Type
101 data NgramsType = Authors | Institutes | Sources | NgramsTerms
102 deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
103
104 ngramsTypes :: [NgramsType]
105 ngramsTypes = [minBound..]
106
107 instance ToSchema NgramsType
108 {- where
109 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
110 -}
111
112 instance FromJSON NgramsType
113 instance FromJSONKey NgramsType where
114 fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
115 instance ToJSON NgramsType
116 instance ToJSONKey NgramsType where
117 toJSONKey = toJSONKeyText (pack . show)
118
119 newtype NgramsTypeId = NgramsTypeId Int
120 deriving (Eq, Show, Ord, Num)
121
122 instance ToField NgramsTypeId where
123 toField (NgramsTypeId n) = toField n
124
125 instance FromField NgramsTypeId where
126 fromField fld mdata = do
127 n <- fromField fld mdata
128 if (n :: Int) > 0 then return $ NgramsTypeId n
129 else mzero
130
131 instance FromHttpApiData NgramsType where
132 parseUrlPiece n = pure $ (read . cs) n
133
134 instance ToParamSchema NgramsType where
135 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
136
137
138
139
140 instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
141 where
142 queryRunnerColumnDefault = fieldQueryRunnerColumn
143
144 pgNgramsType :: NgramsType -> Column PGInt4
145 pgNgramsType = pgNgramsTypeId . ngramsTypeId
146
147 pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
148 pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
149
150 ngramsTypeId :: NgramsType -> NgramsTypeId
151 ngramsTypeId Authors = 1
152 ngramsTypeId Institutes = 2
153 ngramsTypeId Sources = 3
154 ngramsTypeId NgramsTerms = 4
155
156 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
157 fromNgramsTypeId id = lookup id $ fromList [(ngramsTypeId nt,nt) | nt <- [minBound .. maxBound] :: [NgramsType]]
158
159 ------------------------------------------------------------------------
160 -- | TODO put it in Gargantext.Text.Ngrams
161 data Ngrams = Ngrams { _ngramsTerms :: Text
162 , _ngramsSize :: Int
163 } deriving (Generic, Show, Eq, Ord)
164
165 makeLenses ''Ngrams
166 instance PGS.ToRow Ngrams where
167 toRow (Ngrams t s) = [toField t, toField s]
168
169 text2ngrams :: Text -> Ngrams
170 text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
171
172 -------------------------------------------------------------------------
173 -- | TODO put it in Gargantext.Text.Ngrams
174 -- Named entity are typed ngrams of Terms Ngrams
175 data NgramsT a =
176 NgramsT { _ngramsType :: NgramsType
177 , _ngramsT :: a
178 } deriving (Generic, Show, Eq, Ord)
179
180 makeLenses ''NgramsT
181
182 instance Functor NgramsT where
183 fmap = over ngramsT
184 -----------------------------------------------------------------------
185 data NgramsIndexed =
186 NgramsIndexed
187 { _ngrams :: Ngrams
188 , _ngramsId :: NgramsId
189 } deriving (Show, Generic, Eq, Ord)
190
191 makeLenses ''NgramsIndexed
192 ------------------------------------------------------------------------
193 data NgramIds =
194 NgramIds
195 { ngramId :: Int
196 , ngramTerms :: Text
197 } deriving (Show, Generic, Eq, Ord)
198
199 instance PGS.FromRow NgramIds where
200 fromRow = NgramIds <$> field <*> field
201
202 ----------------------
203 withMap :: Map NgramsTerms NgramsId -> NgramsTerms -> NgramsId
204 withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m)
205
206 indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
207 indexNgramsT = fmap . indexNgramsWith . withMap
208
209 indexNgrams :: Map NgramsTerms NgramsId -> Ngrams -> NgramsIndexed
210 indexNgrams = indexNgramsWith . withMap
211
212 -- NP: not sure we need it anymore
213 indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams -> NgramsT NgramsIndexed
214 indexNgramsTWith = fmap . indexNgramsWith
215
216 indexNgramsWith :: (NgramsTerms -> NgramsId) -> Ngrams -> NgramsIndexed
217 indexNgramsWith f n = NgramsIndexed n (f $ _ngramsTerms n)
218
219 -- TODO-ACCESS: access must not be checked here but when insertNgrams is called.
220 insertNgrams :: [Ngrams] -> Cmd err (Map NgramsTerms NgramsId)
221 insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
222
223 -- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
224 insertNgrams' :: [Ngrams] -> Cmd err [NgramIds]
225 insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
226 where
227 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
228
229 insertNgrams_Debug :: [(NgramsTerms, Size)] -> Cmd err ByteString
230 insertNgrams_Debug ns = formatPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
231 where
232 fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
233
234 ----------------------
235 queryInsertNgrams :: PGS.Query
236 queryInsertNgrams = [sql|
237 WITH input_rows(terms,n) AS (?)
238 , ins AS (
239 INSERT INTO ngrams (terms,n)
240 SELECT * FROM input_rows
241 ON CONFLICT (terms) DO NOTHING -- unique index created here
242 RETURNING id,terms
243 )
244
245 SELECT id, terms
246 FROM ins
247 UNION ALL
248 SELECT c.id, terms
249 FROM input_rows
250 JOIN ngrams c USING (terms); -- columns of unique index
251 |]
252
253
254