]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
Merge branch 'dev-doc-table-optimization' of ssh://gitlab.iscpif.fr:20022/gargantext...
[gargantext.git] / src / Gargantext / Database / Query / Table / Node / Document / Insert.hs
1 {-|
2 Module : Gargantext.Database.Node.Document.Insert
3 Description : Importing context of texts (documents)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 * Purpose of this module
11
12 Enabling "common goods" of text data and respecting privacy.
13
14 Gargantext shares as "common good" the links between context of texts
15 and terms / words / ngrams.
16
17 Basically a context of text can be defined as a document (see 'Gargantext.Text').
18
19 Issue to tackle in that module: each global document of Gargantext has
20 to be unique, then shared, but how to respect privacy if needed ?
21
22
23 * Methodology to get uniqueness and privacy by design
24
25 As a consequence, when importing/inserting a new document in Gargantext,
26 a policy for the uniqueness of the inserted docuemnts has to be defined.
27
28 That is the purpose of this module which defines its main concepts.
29
30 Unique identifier in database is of a 3-tuple of 3 policies that
31 together define uniqueness:
32
33 - Design policy: type of node is needed as TypenameId, that is a
34 Document or Individual or something else;
35
36 - Privacy policy: with ParentId, parent becomes unique, then it enables
37 users to get their own copy without sharing it with all the users of the
38 database (in others words parent_id is necessary to preserve privacy for
39 instance).
40
41 - Hash policy: this UniqId is a sha256 uniq id which is the result of
42 the concatenation of the parameters defined by @shaParameters@.
43
44 > -- * Example
45 > insertTest :: FromRow r => CorpusId -> [Node HyperdataDocument] -> IO [r]
46 > insertTest :: IO [ReturnId]
47 > insertTest = runCmdDev $ insertDocuments 1 452162 hyperdataDocuments
48
49 -}
50 ------------------------------------------------------------------------
51 {-# LANGUAGE QuasiQuotes #-}
52 {-# LANGUAGE TypeSynonymInstances #-}
53 ------------------------------------------------------------------------
54 module Gargantext.Database.Query.Table.Node.Document.Insert
55 where
56
57 import Control.Lens (set, view)
58 import Control.Lens.Cons
59 import Control.Lens.Prism
60 import Data.Aeson (toJSON)
61 import Data.Maybe (maybe)
62 import Data.Text (Text)
63 import Data.Time.Segment (jour)
64 import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
65 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
66 import Database.PostgreSQL.Simple.SqlQQ
67 import Database.PostgreSQL.Simple.ToField (toField, Action)
68 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
69 import GHC.Generics (Generic)
70 import qualified Data.ByteString.Lazy.Char8 as DC (pack)
71 import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
72 import qualified Data.Text as DT (pack, unpack, concat, take)
73
74 import Gargantext.Database.Query.Table.Node.Contact -- (HyperdataContact(..), ContactWho(..))
75 import Gargantext.Database.Admin.Config (nodeTypeId)
76 import Gargantext.Database.Admin.Types.Hyperdata
77 import Gargantext.Database.Admin.Types.Node
78 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
79 import Gargantext.Prelude
80 import Gargantext.Prelude.Utils (sha)
81
82 -- TODO : the import of Document constructor below does not work
83 -- import Gargantext.Database.Types.Node (Document)
84 --import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..)
85 -- , hyperdataDocument_uniqId
86 -- , hyperdataDocument_title
87 -- , hyperdataDocument_abstract
88 -- , hyperdataDocument_source
89 -- , Node(..), node_typename
90 -- , node_userId
91 -- , node_parentId, node_name, node_hyperdata, hyperdataDocuments
92 -- , NodeTypeId
93 -- )
94 {-| To Print result query
95 import Data.ByteString.Internal (ByteString)
96 import Database.PostgreSQL.Simple (formatQuery)
97 -}
98
99 ---------------------------------------------------------------------------
100 -- * Main Insert functions
101
102 -- | Insert Document main function
103 -- UserId : user who is inserting the documents
104 -- ParentId : folder ID which is parent of the inserted documents
105 -- Administrator of the database has to create a uniq index as following SQL command:
106 -- `create unique index on nodes (typename, parent_id, (hyperdata ->> 'uniqId'));`
107 insertDb :: InsertDb a => UserId -> ParentId -> [a] -> Cmd err [ReturnId]
108 insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
109 where
110 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
111
112 class InsertDb a
113 where
114 insertDb' :: UserId -> ParentId -> a -> [Action]
115
116
117 instance InsertDb HyperdataDocument
118 where
119 insertDb' u p h = [ toField $ nodeTypeId NodeDocument
120 , toField u
121 , toField p
122 , toField $ maybe "No Title" (DT.take 255) (_hyperdataDocument_title h)
123 , toField $ _hyperdataDocument_publication_date h -- TODO USE UTCTime
124 , (toField . toJSON) h
125 ]
126
127 instance InsertDb HyperdataContact
128 where
129 insertDb' u p h = [ toField $ nodeTypeId NodeContact
130 , toField u
131 , toField p
132 , toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
133 , toField $ jour 2010 1 1 -- TODO put default date
134 , (toField . toJSON) h
135 ]
136
137 -- | Debug SQL function
138 --
139 -- to print rendered query (Debug purpose) use @formatQuery@ function.
140 {-
141 insertDocuments_Debug :: (Hyperdata a, ToJSON a, ToRow a) => UserId -> ParentId -> [a] -> Cmd ByteString
142 insertDocuments_Debug uId pId hs = formatPGSQuery queryInsert (Only $ Values fields inputData)
143 where
144 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
145 inputData = prepare uId pId hs
146 -}
147
148
149 -- | Input Tables: types of the tables
150 inputSqlTypes :: [Text]
151 inputSqlTypes = map DT.pack ["int4","int4","int4","text","date","jsonb"]
152
153 -- | SQL query to insert documents inside the database
154 queryInsert :: Query
155 queryInsert = [sql|
156 WITH input_rows(typename,user_id,parent_id,name,date,hyperdata) AS (?)
157 , ins AS (
158 INSERT INTO nodes (typename,user_id,parent_id,name,date,hyperdata)
159 SELECT * FROM input_rows
160 ON CONFLICT ((hyperdata ->> 'uniqIdBdd')) DO NOTHING -- on unique index
161 -- ON CONFLICT (typename, parent_id, (hyperdata ->> 'uniqId')) DO NOTHING -- on unique index
162 RETURNING id,hyperdata
163 )
164
165 SELECT true AS source -- true for 'newly inserted'
166 , id
167 , hyperdata ->> 'uniqId' as doi
168 FROM ins
169 UNION ALL
170 SELECT false AS source -- false for 'not inserted'
171 , c.id
172 , hyperdata ->> 'uniqId' as doi
173 FROM input_rows
174 JOIN nodes c USING (hyperdata); -- columns of unique index
175 |]
176
177 ------------------------------------------------------------------------
178 -- * Main Types used
179
180 -- ** Return Types
181
182 -- | When documents are inserted
183 -- ReturnType after insertion
184 data ReturnId = ReturnId { reInserted :: Bool -- if the document is inserted (True: is new, False: is not new)
185 , reId :: NodeId -- always return the id of the document (even new or not new)
186 -- this is the uniq id in the database
187 , reUniqId :: Text -- Hash Id with concatenation of sha parameters
188 } deriving (Show, Generic)
189
190 instance FromRow ReturnId where
191 fromRow = ReturnId <$> field <*> field <*> field
192
193 ---------------------------------------------------------------------------
194 -- * Uniqueness of document definition
195
196 class AddUniqId a
197 where
198 addUniqId :: a -> a
199
200 instance AddUniqId HyperdataDocument
201 where
202 addUniqId = addUniqIdsDoc
203 where
204 addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
205 addUniqIdsDoc doc = set hyperdataDocument_uniqIdBdd (Just shaBdd)
206 $ set hyperdataDocument_uniqId (Just shaUni) doc
207 where
208 shaUni = sha $ DT.concat $ map ($ doc) shaParametersDoc
209 shaBdd = sha $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hyperdataDocument_bdd d))] <> shaParametersDoc)
210
211 shaParametersDoc :: [(HyperdataDocument -> Text)]
212 shaParametersDoc = [ \d -> maybeText (_hyperdataDocument_title d)
213 , \d -> maybeText (_hyperdataDocument_abstract d)
214 , \d -> maybeText (_hyperdataDocument_source d)
215 , \d -> maybeText (_hyperdataDocument_publication_date d)
216 ]
217
218 ---------------------------------------------------------------------------
219 -- * Uniqueness of document definition
220 -- TODO factorize with above (use the function below for tests)
221
222 instance AddUniqId HyperdataContact
223 where
224 addUniqId = addUniqIdsContact
225
226 addUniqIdsContact :: HyperdataContact -> HyperdataContact
227 addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
228 $ set (hc_uniqId ) (Just shaUni) hc
229 where
230 shaUni = uniqId $ DT.concat $ map ($ hc) shaParametersContact
231 shaBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
232
233 uniqId :: Text -> Text
234 uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
235
236 -- | TODO add more shaparameters
237 shaParametersContact :: [(HyperdataContact -> Text)]
238 shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName) d
239 , \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
240 , \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
241 ]
242
243
244
245 maybeText :: Maybe Text -> Text
246 maybeText = maybe (DT.pack "") identity
247
248 ---------------------------------------------------------------------------