]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Node/Document/Insert.hs
[READING] proposal: keeping data consistency with database.
[gargantext.git] / src / Gargantext / Database / Node / Document / Insert.hs
1 {-|
2 Module : Gargantext.Database.Node.Document.Insert
3 Description : Importing context of texts (documents)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 * Purpose of this module
11
12 Enabling "common goods" of text data and respecting privacy.
13
14 Gargantext shares as "common good" the links between context of texts
15 and terms / words / ngrams.
16
17 Basically a context of text can be defined as a document (see 'Gargantext.Text').
18
19 Issue to tackle in that module: each global document of Gargantext has
20 to be unique, then shared, but how to respect privacy if needed ?
21
22
23 * Methodology to get uniqueness and privacy by design
24
25 As a consequence, when importing/inserting a new document in Gargantext,
26 a policy for the uniqueness of the inserted docuemnts has to be defined.
27
28 That is the purpose of this module which defines its main concepts.
29
30 Unique identifier in database is of a 3-tuple of 3 policies that
31 together define uniqueness:
32
33 - Design policy: type of node is needed as TypenameId, that is a
34 Document or Individual or something else;
35
36 - Privacy policy: with ParentId, parent becomes unique, then it enables
37 users to get their own copy without sharing it with all the users of the
38 database (in others words parent_id is necessary to preserve privacy for
39 instance).
40
41 - Hash policy: this UniqId is a sha256 uniq id which is the result of
42 the concatenation of the parameters defined by @hashParameters@.
43
44 > -- * Example
45 > insertTest :: FromRow r => CorpusId -> [Node HyperdataDocument] -> IO [r]
46 > insertTest :: IO [ReturnId]
47 > insertTest = runCmdDev $ insertDocuments 1 452162 hyperdataDocuments
48
49 -}
50 ------------------------------------------------------------------------
51 {-# LANGUAGE DeriveDataTypeable #-}
52 {-# LANGUAGE DeriveGeneric #-}
53 {-# LANGUAGE FlexibleInstances #-}
54 {-# LANGUAGE NoImplicitPrelude #-}
55 {-# LANGUAGE OverloadedStrings #-}
56 {-# LANGUAGE QuasiQuotes #-}
57 {-# LANGUAGE RankNTypes #-}
58 {-# LANGUAGE TypeSynonymInstances #-}
59 ------------------------------------------------------------------------
60 module Gargantext.Database.Node.Document.Insert where
61
62 import Control.Lens (set, view)
63 import Control.Lens.Prism
64 import Control.Lens.Cons
65 import Data.Aeson (toJSON, Value)
66 import Data.Maybe (maybe)
67 import Data.Text (Text)
68 import Data.Typeable (Typeable)
69 import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
70 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
71 import Database.PostgreSQL.Simple.SqlQQ
72 import Database.PostgreSQL.Simple.ToField (toField)
73 import Database.PostgreSQL.Simple.ToRow (ToRow(..))
74 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
75 import GHC.Generics (Generic)
76 import Gargantext.Database.Config (nodeTypeId)
77 import Gargantext.Database.Utils (Cmd, runPGSQuery)
78 import Gargantext.Database.Node.Contact -- (HyperdataContact(..), ContactWho(..))
79 import Gargantext.Database.Types.Node
80 import Gargantext.Prelude
81 import qualified Data.ByteString.Lazy.Char8 as DC (pack)
82 import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
83 import qualified Data.Text as DT (pack, unpack, concat, take)
84
85 -- TODO : the import of Document constructor below does not work
86 -- import Gargantext.Database.Types.Node (Document)
87 --import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..)
88 -- , hyperdataDocument_uniqId
89 -- , hyperdataDocument_title
90 -- , hyperdataDocument_abstract
91 -- , hyperdataDocument_source
92 -- , Node(..), node_typename
93 -- , node_userId
94 -- , node_parentId, node_name, node_hyperdata, hyperdataDocuments
95 -- , NodeTypeId
96 -- )
97 {-| To Print result query
98 import Data.ByteString.Internal (ByteString)
99 import Database.PostgreSQL.Simple (formatQuery)
100 -}
101
102 ---------------------------------------------------------------------------
103 -- * Main Insert functions
104
105 -- ** Database configuration
106 -- Administrator of the database has to create a uniq index as following SQL command:
107 -- `create unique index on nodes (typename, parent_id, (hyperdata ->> 'uniqId'));`
108
109 -- | Insert Document main function
110 -- UserId : user who is inserting the documents
111 -- ParentId : folder ID which is parent of the inserted documents
112
113
114 data ToDbData = ToDbDocument HyperdataDocument | ToDbContact HyperdataContact
115
116 -- TODO-ACCESS: check uId CanInsertDoc pId && checkDocType nodeType
117 -- TODO-EVENTS: InsertedNodes
118 insertDocuments :: UserId -> ParentId -> NodeType -> [ToDbData] -> Cmd err [ReturnId]
119 insertDocuments uId pId nodeType =
120 runPGSQuery queryInsert . Only . Values fields . prepare uId pId nodeType
121 where
122 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
123
124 -- | Debug SQL function
125 --
126 -- to print rendered query (Debug purpose) use @formatQuery@ function.
127 {-
128 insertDocuments_Debug :: (Hyperdata a, ToJSON a, ToRow a) => UserId -> ParentId -> [a] -> Cmd ByteString
129 insertDocuments_Debug uId pId hs = formatPGSQuery queryInsert (Only $ Values fields inputData)
130 where
131 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
132 inputData = prepare uId pId hs
133 -}
134
135
136 -- | Input Tables: types of the tables
137 inputSqlTypes :: [Text]
138 inputSqlTypes = map DT.pack ["int4","int4","int4","text","jsonb"]
139
140 -- | SQL query to insert documents inside the database
141 queryInsert :: Query
142 queryInsert = [sql|
143 WITH input_rows(typename,user_id,parent_id,name,hyperdata) AS (?)
144 , ins AS (
145 INSERT INTO nodes (typename,user_id,parent_id,name,hyperdata)
146 SELECT * FROM input_rows
147 ON CONFLICT ((hyperdata ->> 'uniqIdBdd')) DO NOTHING -- on unique index
148 -- ON CONFLICT (typename, parent_id, (hyperdata ->> 'uniqId')) DO NOTHING -- on unique index
149 RETURNING id,hyperdata
150 )
151
152 SELECT true AS source -- true for 'newly inserted'
153 , id
154 , hyperdata ->> 'uniqId' as doi
155 FROM ins
156 UNION ALL
157 SELECT false AS source -- false for 'not inserted'
158 , c.id
159 , hyperdata ->> 'uniqId' as doi
160 FROM input_rows
161 JOIN nodes c USING (hyperdata); -- columns of unique index
162 |]
163
164 prepare :: UserId -> ParentId -> NodeType -> [ToDbData] -> [InputData]
165 prepare uId pId nodeType = map (\h -> InputData tId uId pId (name h) (toJSON' h))
166 where
167 tId = nodeTypeId nodeType
168
169 toJSON' (ToDbDocument hd) = toJSON hd
170 toJSON' (ToDbContact hc) = toJSON hc
171
172 name h = DT.take 255 <$> maybe "No Title" identity $ f h
173 where
174 f (ToDbDocument hd) = _hyperdataDocument_title hd
175 f (ToDbContact _ ) = Just "Contact" -- TODO view FirstName . LastName
176
177 ------------------------------------------------------------------------
178 -- * Main Types used
179
180 -- ** Return Types
181
182 -- | When documents are inserted
183 -- ReturnType after insertion
184 data ReturnId = ReturnId { reInserted :: Bool -- ^ if the document is inserted (True: is new, False: is not new)
185 , reId :: NodeId -- ^ always return the id of the document (even new or not new)
186 -- this is the uniq id in the database
187 , reUniqId :: Text -- ^ Hash Id with concatenation of hash parameters
188 } deriving (Show, Generic)
189
190 instance FromRow ReturnId where
191 fromRow = ReturnId <$> field <*> field <*> field
192
193 -- ** Insert Types
194
195 data InputData = InputData { inTypenameId :: NodeTypeId
196 , inUserId :: UserId
197 , inParentId :: ParentId
198 , inName :: Text
199 , inHyper :: Value
200 } deriving (Show, Generic, Typeable)
201
202 instance ToRow InputData where
203 toRow inputData = [ toField (inTypenameId inputData)
204 , toField (inUserId inputData)
205 , toField (inParentId inputData)
206 , toField (inName inputData)
207 , toField (inHyper inputData)
208 ]
209
210 ---------------------------------------------------------------------------
211 -- * Uniqueness of document definition
212
213 addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
214 addUniqIdsDoc doc = set hyperdataDocument_uniqIdBdd (Just hashBdd)
215 $ set hyperdataDocument_uniqId (Just hash) doc
216 where
217 hash = uniqId $ DT.concat $ map ($ doc) hashParametersDoc
218 hashBdd = uniqId $ DT.concat $ map ($ doc) ([(\d -> maybe' (_hyperdataDocument_bdd d))] <> hashParametersDoc)
219
220 uniqId :: Text -> Text
221 uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
222
223
224 hashParametersDoc :: [(HyperdataDocument -> Text)]
225 hashParametersDoc = [ \d -> maybe' (_hyperdataDocument_title d)
226 , \d -> maybe' (_hyperdataDocument_abstract d)
227 , \d -> maybe' (_hyperdataDocument_source d)
228 , \d -> maybe' (_hyperdataDocument_publication_date d)
229 ]
230 ---------------------------------------------------------------------------
231 -- * Uniqueness of document definition
232 -- TODO factorize with above (use the function below for tests)
233 addUniqIdsContact :: HyperdataContact -> HyperdataContact
234 addUniqIdsContact hc = set (hc_uniqIdBdd) (Just hashBdd)
235 $ set (hc_uniqId) (Just hash) hc
236 where
237 hash = uniqId $ DT.concat $ map ($ hc) hashParametersContact
238 hashBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybe' (view hc_bdd d)] <> hashParametersContact)
239
240 uniqId :: Text -> Text
241 uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
242
243 -- | TODO add more hashparameters
244 hashParametersContact :: [(HyperdataContact -> Text)]
245 hashParametersContact = [ \d -> maybe' $ view (hc_who . _Just . cw_firstName) d
246 , \d -> maybe' $ view (hc_who . _Just . cw_lastName ) d
247 , \d -> maybe' $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
248 ]
249
250
251
252 maybe' :: Maybe Text -> Text
253 maybe' = maybe (DT.pack "") identity
254
255 ---------------------------------------------------------------------------
256