]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
Merge remote-tracking branch 'origin/flexible-job-queue' into dev
[gargantext.git] / src / Gargantext / Database / Query / Table / Node / Document / Insert.hs
1 {-|
2 Module : Gargantext.Database.Node.Document.Insert
3 Description : Importing context of texts (documents)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 * Purpose of this module
11
12 Enabling "common goods" of text data and respecting privacy.
13
14 Gargantext shares as "common good" the links between context of texts
15 and terms / words / ngrams.
16
17 Basically a context of text can be defined as a document (see 'Gargantext.Core.Text').
18
19 Issue to tackle in that module: each global document of Gargantext has
20 to be unique, then shared, but how to respect privacy if needed ?
21
22
23 * Methodology to get uniqueness and privacy by design
24
25 As a consequence, when importing/inserting a new document in Gargantext,
26 a policy for the uniqueness of the inserted docuemnts has to be defined.
27
28 That is the purpose of this module which defines its main concepts.
29
30 Unique identifier in database is of a 3-tuple of 3 policies that
31 together define uniqueness:
32
33 - Design policy: type of node is needed as TypenameId, that is a
34 Document or Individual or something else;
35
36 - Privacy policy: with ParentId, parent becomes unique, then it enables
37 users to get their own copy without sharing it with all the users of the
38 database (in others words parent_id is necessary to preserve privacy for
39 instance).
40
41 - Hash policy: this UniqId is a sha256 uniq id which is the result of
42 the concatenation of the parameters defined by @shaParameters@.
43
44 > -- * Example
45 > insertTest :: FromRow r => CorpusId -> [Node HyperdataDocument] -> IO [r]
46 > insertTest :: IO [ReturnId]
47 > insertTest = runCmdDev $ insertDocuments 1 452162 hyperdataDocuments
48
49 -}
50 ------------------------------------------------------------------------
51 {-# LANGUAGE QuasiQuotes #-}
52 {-# LANGUAGE TypeSynonymInstances #-}
53 ------------------------------------------------------------------------
54 module Gargantext.Database.Query.Table.Node.Document.Insert
55 where
56
57 import Control.Lens (set, view)
58 import Control.Lens.Cons
59 import Control.Lens.Prism
60 import Data.Aeson (toJSON, encode, ToJSON)
61 import Data.Char (isAlpha)
62 import Data.Maybe (fromMaybe)
63 import Data.Text (Text)
64 -- import Data.ByteString (ByteString)
65 import Data.Time.Segment (jour)
66 import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
67 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
68 -- import Database.PostgreSQL.Simple.ToRow (toRow, ToRow)
69 import Database.PostgreSQL.Simple.SqlQQ
70 import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
71 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
72 import GHC.Generics (Generic)
73 import Gargantext.Core (HasDBid(toDBid))
74 import Gargantext.Database.Admin.Types.Hyperdata
75 import Gargantext.Database.Admin.Types.Node
76 import Gargantext.Database.Prelude (Cmd, runPGSQuery{-, formatPGSQuery-})
77 import Gargantext.Database.Schema.Node (NodePoly(..))
78 import qualified Gargantext.Defaults as Defaults
79 import Gargantext.Prelude
80 import Gargantext.Prelude.Crypto.Hash (hash)
81 import qualified Data.Text as DT (pack, concat, take, filter, toLower)
82
83 {-| To Print result query
84 import Data.ByteString.Internal (ByteString)
85 import Database.PostgreSQL.Simple (formatQuery)
86 -}
87
88 ---------------------------------------------------------------------------
89 -- * Main Insert functions
90
91 -- | Insert Document main function
92 -- UserId : user who is inserting the documents
93 -- ParentId : folder ID which is parent of the inserted documents
94 -- Administrator of the database has to create a uniq index as following SQL command:
95 -- `create unique index on contexts table (typename, parent_id, (hyperdata ->> 'uniqId'));`
96 insertDb :: (InsertDb a, HasDBid NodeType) => UserId -> Maybe ParentId -> [a] -> Cmd err [ReturnId]
97 insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
98 where
99 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
100
101 class InsertDb a
102 where
103 insertDb' :: HasDBid NodeType => UserId -> Maybe ParentId -> a -> [Action]
104
105
106 instance InsertDb HyperdataDocument
107 where
108 insertDb' u p h = [ toField ("" :: Text)
109 , toField $ toDBid NodeDocument
110 , toField u
111 , toField p
112 , toField $ maybe "No Title" (DT.take 255) (_hd_title h)
113 , toField $ _hd_publication_date h -- TODO USE UTCTime
114 , (toField . toJSON) h
115 ]
116
117 instance InsertDb HyperdataContact
118 where
119 insertDb' u p h = [ toField ("" :: Text)
120 , toField $ toDBid NodeContact
121 , toField u
122 , toField p
123 , toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
124 , toField $ jour 0 1 1 -- TODO put default date
125 , (toField . toJSON) h
126 ]
127
128 instance ToJSON a => InsertDb (Node a)
129 where
130 insertDb' _u _p (Node _nid hashId t u p n d h) = [ toField hashId
131 , toField t
132 , toField u
133 , toField p
134 , toField n
135 , toField d
136 , (toField . toJSON) h
137 ]
138
139 -- | Debug SQL function
140 --
141 -- to print rendered query (Debug purpose) use @formatQuery@ function.
142 {-
143 insertDocuments_Debug :: (Hyperdata a, ToJSON a, ToRow a, InsertDb [a])
144 => UserId -> ParentId -> [a] -> Cmd err ByteString
145 insertDocuments_Debug uId pId hs = formatPGSQuery queryInsert (Only $ Values fields inputData)
146 where
147 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
148 inputData = insertDb' uId pId hs
149 -}
150
151 -- | Input Tables: types of the tables
152 inputSqlTypes :: [Text]
153 inputSqlTypes = map DT.pack ["text", "int4","int4","int4","text","date","jsonb"]
154
155 -- | SQL query to insert documents inside the database
156 queryInsert :: Query
157 queryInsert = [sql|
158 WITH input_rows(hash_id,typename,user_id,parent_id,name,date,hyperdata) AS (?)
159 , ins AS (
160 INSERT INTO contexts (hash_id, typename,user_id,parent_id,name,date,hyperdata)
161 SELECT * FROM input_rows
162 ON CONFLICT (hash_id) DO NOTHING -- on unique index -- this does not return the ids
163 RETURNING id,hash_id
164 )
165
166 SELECT true AS source -- true for 'newly inserted'
167 , id
168 , hash_id
169 FROM ins
170 UNION ALL
171 SELECT false AS source -- false for 'not inserted'
172 , n.id
173 , hash_id
174 FROM input_rows
175 JOIN contexts n USING (hash_id); -- columns of unique index
176 |]
177
178 ------------------------------------------------------------------------
179 -- * Main Types used
180 -- ** Return Types
181
182 -- | When documents are inserted
183 -- ReturnType after insertion
184 data ReturnId = ReturnId { reInserted :: Bool -- if the document is inserted (True: is new, False: is not new)
185 , reId :: NodeId -- always return the id of the document (even new or not new)
186 -- this is the uniq id in the database
187 , reUniqId :: Text -- Hash Id with concatenation of sha parameters
188 } deriving (Show, Generic)
189
190 instance FromRow ReturnId where
191 fromRow = ReturnId <$> field <*> field <*> field
192
193 ---------------------------------------------------------------------------
194 -- * Uniqueness of document definition
195
196 class AddUniqId a
197 where
198 addUniqId :: a -> a
199
200 instance AddUniqId HyperdataDocument
201 where
202 addUniqId = addUniqIdsDoc
203 where
204 addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
205 addUniqIdsDoc doc = set hd_uniqIdBdd (Just shaBdd)
206 $ set hd_uniqId (Just shaUni) doc
207 where
208 shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
209 shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
210
211 shaParametersDoc :: [(HyperdataDocument -> Text)]
212 shaParametersDoc = [ \d -> filterText $ maybeText (_hd_title d)
213 , \d -> filterText $ maybeText (_hd_abstract d)
214 , \d -> filterText $ maybeText (_hd_source d)
215 , \d -> maybeText (_hd_publication_date d)
216 ]
217
218 filterText :: Text -> Text
219 filterText = DT.toLower . (DT.filter isAlpha)
220
221 -- TODO put this elsewhere (fix bin/gargantext-init/Main.hs too)
222 secret :: Text
223 secret = "Database secret to change"
224
225
226 instance (AddUniqId a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a)
227 where
228 addUniqId (Node nid _ t u p n d h) = Node nid hashId t u p n d h
229 where
230 hashId = Just $ "\\x" <> (hash $ DT.concat params)
231 params = [ secret
232 , cs $ show $ toDBid NodeDocument
233 , n
234 , cs $ show p
235 , cs $ encode h
236 ]
237 {-
238 addUniqId n@(Node nid _ t u p n d h) =
239 case n of
240 Node HyperdataDocument -> Node nid hashId t u p n d h
241 where
242 hashId = "\\x" <> (hash $ DT.concat params)
243 params = [ secret
244 , cs $ show $ toDBid NodeDocument
245 , n
246 , cs $ show p
247 , cs $ encode h
248 ]
249 _ -> undefined
250 -}
251
252 ---------------------------------------------------------------------------
253 -- * Uniqueness of document definition
254 -- TODO factorize with above (use the function below for tests)
255
256 instance AddUniqId HyperdataContact
257 where
258 addUniqId = addUniqIdsContact
259
260 addUniqIdsContact :: HyperdataContact -> HyperdataContact
261 addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
262 $ set (hc_uniqId ) (Just shaUni) hc
263 where
264 shaUni = hash $ DT.concat $ map ($ hc) shaParametersContact
265 shaBdd = hash $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
266
267 -- | TODO add more shaparameters
268 shaParametersContact :: [(HyperdataContact -> Text)]
269 shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName ) d
270 , \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
271 , \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
272 ]
273
274
275 maybeText :: Maybe Text -> Text
276 maybeText = maybe (DT.pack "") identity
277
278 ---------------------------------------------------------------------------
279 class ToNode a
280 where
281 -- TODO Maybe NodeId
282 toNode :: HasDBid NodeType => UserId -> Maybe ParentId -> a -> Node a
283
284 instance ToNode HyperdataDocument where
285 toNode u p h = Node 0 Nothing (toDBid NodeDocument) u p n date h
286 where
287 n = maybe "No Title" (DT.take 255) (_hd_title h)
288 date = jour y m d
289 -- NOTE: There is no year '0' in postgres, there is year 1 AD and beofre that year 1 BC:
290 -- select '0001-01-01'::date, '0001-01-01'::date - '1 day'::interval;
291 -- 0001-01-01 0001-12-31 00:00:00 BC
292 y = fromIntegral $ fromMaybe Defaults.day $ _hd_publication_year h
293 m = fromMaybe Defaults.month $ _hd_publication_month h
294 d = fromMaybe (fromIntegral Defaults.year) $ _hd_publication_day h
295
296 -- TODO better Node
297 instance ToNode HyperdataContact where
298 toNode u p h = Node 0 Nothing (toDBid NodeContact) u p "Contact" date h
299 where
300 date = jour 2020 01 01
301
302
303