]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Query/Node/Document/Insert.hs
[DB/REFACT] intermediary step
[gargantext.git] / src / Gargantext / Database / Action / Query / Node / Document / Insert.hs
1 {-|
2 Module : Gargantext.Database.Node.Document.Insert
3 Description : Importing context of texts (documents)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 * Purpose of this module
11
12 Enabling "common goods" of text data and respecting privacy.
13
14 Gargantext shares as "common good" the links between context of texts
15 and terms / words / ngrams.
16
17 Basically a context of text can be defined as a document (see 'Gargantext.Text').
18
19 Issue to tackle in that module: each global document of Gargantext has
20 to be unique, then shared, but how to respect privacy if needed ?
21
22
23 * Methodology to get uniqueness and privacy by design
24
25 As a consequence, when importing/inserting a new document in Gargantext,
26 a policy for the uniqueness of the inserted docuemnts has to be defined.
27
28 That is the purpose of this module which defines its main concepts.
29
30 Unique identifier in database is of a 3-tuple of 3 policies that
31 together define uniqueness:
32
33 - Design policy: type of node is needed as TypenameId, that is a
34 Document or Individual or something else;
35
36 - Privacy policy: with ParentId, parent becomes unique, then it enables
37 users to get their own copy without sharing it with all the users of the
38 database (in others words parent_id is necessary to preserve privacy for
39 instance).
40
41 - Hash policy: this UniqId is a sha256 uniq id which is the result of
42 the concatenation of the parameters defined by @shaParameters@.
43
44 > -- * Example
45 > insertTest :: FromRow r => CorpusId -> [Node HyperdataDocument] -> IO [r]
46 > insertTest :: IO [ReturnId]
47 > insertTest = runCmdDev $ insertDocuments 1 452162 hyperdataDocuments
48
49 -}
50 ------------------------------------------------------------------------
51 {-# LANGUAGE DeriveGeneric #-}
52 {-# LANGUAGE FlexibleContexts #-}
53 {-# LANGUAGE FlexibleInstances #-}
54 {-# LANGUAGE NoImplicitPrelude #-}
55 {-# LANGUAGE OverloadedStrings #-}
56 {-# LANGUAGE QuasiQuotes #-}
57 {-# LANGUAGE RankNTypes #-}
58 {-# LANGUAGE TypeSynonymInstances #-}
59 ------------------------------------------------------------------------
60 module Gargantext.Database.Action.Query.Node.Document.Insert
61 where
62
63 import Control.Lens (set, view)
64 import Control.Lens.Cons
65 import Control.Lens.Prism
66 import Data.Aeson (toJSON)
67 import Data.Maybe (maybe)
68 import Data.Text (Text)
69 import Data.Time.Segment (jour)
70 import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
71 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
72 import Database.PostgreSQL.Simple.SqlQQ
73 import Database.PostgreSQL.Simple.ToField (toField, Action)
74 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
75 import GHC.Generics (Generic)
76 import Gargantext.Database.Action.Query.Node.Contact -- (HyperdataContact(..), ContactWho(..))
77 import Gargantext.Database.Admin.Config (nodeTypeId)
78 import Gargantext.Database.Admin.Types.Node
79 import Gargantext.Database.Admin.Utils (Cmd, runPGSQuery)
80 import Gargantext.Prelude
81 import Gargantext.Prelude.Utils (sha)
82 import qualified Data.ByteString.Lazy.Char8 as DC (pack)
83 import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
84 import qualified Data.Text as DT (pack, unpack, concat, take)
85
86 -- TODO : the import of Document constructor below does not work
87 -- import Gargantext.Database.Types.Node (Document)
88 --import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..)
89 -- , hyperdataDocument_uniqId
90 -- , hyperdataDocument_title
91 -- , hyperdataDocument_abstract
92 -- , hyperdataDocument_source
93 -- , Node(..), node_typename
94 -- , node_userId
95 -- , node_parentId, node_name, node_hyperdata, hyperdataDocuments
96 -- , NodeTypeId
97 -- )
98 {-| To Print result query
99 import Data.ByteString.Internal (ByteString)
100 import Database.PostgreSQL.Simple (formatQuery)
101 -}
102
103 ---------------------------------------------------------------------------
104 -- * Main Insert functions
105
106 -- | Insert Document main function
107 -- UserId : user who is inserting the documents
108 -- ParentId : folder ID which is parent of the inserted documents
109 -- Administrator of the database has to create a uniq index as following SQL command:
110 -- `create unique index on nodes (typename, parent_id, (hyperdata ->> 'uniqId'));`
111 insertDb :: InsertDb a => UserId -> ParentId -> [a] -> Cmd err [ReturnId]
112 insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
113 where
114 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
115
116 class InsertDb a
117 where
118 insertDb' :: UserId -> ParentId -> a -> [Action]
119
120
121 instance InsertDb HyperdataDocument
122 where
123 insertDb' u p h = [ toField $ nodeTypeId NodeDocument
124 , toField u
125 , toField p
126 , toField $ maybe "No Title" (DT.take 255) (_hyperdataDocument_title h)
127 , toField $ _hyperdataDocument_publication_date h -- TODO USE UTCTime
128 , (toField . toJSON) h
129 ]
130
131 instance InsertDb HyperdataContact
132 where
133 insertDb' u p h = [ toField $ nodeTypeId NodeContact
134 , toField u
135 , toField p
136 , toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
137 , toField $ jour 2010 1 1 -- TODO put default date
138 , (toField . toJSON) h
139 ]
140
141 -- | Debug SQL function
142 --
143 -- to print rendered query (Debug purpose) use @formatQuery@ function.
144 {-
145 insertDocuments_Debug :: (Hyperdata a, ToJSON a, ToRow a) => UserId -> ParentId -> [a] -> Cmd ByteString
146 insertDocuments_Debug uId pId hs = formatPGSQuery queryInsert (Only $ Values fields inputData)
147 where
148 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
149 inputData = prepare uId pId hs
150 -}
151
152
153 -- | Input Tables: types of the tables
154 inputSqlTypes :: [Text]
155 inputSqlTypes = map DT.pack ["int4","int4","int4","text","date","jsonb"]
156
157 -- | SQL query to insert documents inside the database
158 queryInsert :: Query
159 queryInsert = [sql|
160 WITH input_rows(typename,user_id,parent_id,name,date,hyperdata) AS (?)
161 , ins AS (
162 INSERT INTO nodes (typename,user_id,parent_id,name,date,hyperdata)
163 SELECT * FROM input_rows
164 ON CONFLICT ((hyperdata ->> 'uniqIdBdd')) DO NOTHING -- on unique index
165 -- ON CONFLICT (typename, parent_id, (hyperdata ->> 'uniqId')) DO NOTHING -- on unique index
166 RETURNING id,hyperdata
167 )
168
169 SELECT true AS source -- true for 'newly inserted'
170 , id
171 , hyperdata ->> 'uniqId' as doi
172 FROM ins
173 UNION ALL
174 SELECT false AS source -- false for 'not inserted'
175 , c.id
176 , hyperdata ->> 'uniqId' as doi
177 FROM input_rows
178 JOIN nodes c USING (hyperdata); -- columns of unique index
179 |]
180
181 ------------------------------------------------------------------------
182 -- * Main Types used
183
184 -- ** Return Types
185
186 -- | When documents are inserted
187 -- ReturnType after insertion
188 data ReturnId = ReturnId { reInserted :: Bool -- if the document is inserted (True: is new, False: is not new)
189 , reId :: NodeId -- always return the id of the document (even new or not new)
190 -- this is the uniq id in the database
191 , reUniqId :: Text -- Hash Id with concatenation of sha parameters
192 } deriving (Show, Generic)
193
194 instance FromRow ReturnId where
195 fromRow = ReturnId <$> field <*> field <*> field
196
197 ---------------------------------------------------------------------------
198 -- * Uniqueness of document definition
199
200 class AddUniqId a
201 where
202 addUniqId :: a -> a
203
204 instance AddUniqId HyperdataDocument
205 where
206 addUniqId = addUniqIdsDoc
207 where
208 addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
209 addUniqIdsDoc doc = set hyperdataDocument_uniqIdBdd (Just shaBdd)
210 $ set hyperdataDocument_uniqId (Just shaUni) doc
211 where
212 shaUni = sha $ DT.concat $ map ($ doc) shaParametersDoc
213 shaBdd = sha $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hyperdataDocument_bdd d))] <> shaParametersDoc)
214
215 shaParametersDoc :: [(HyperdataDocument -> Text)]
216 shaParametersDoc = [ \d -> maybeText (_hyperdataDocument_title d)
217 , \d -> maybeText (_hyperdataDocument_abstract d)
218 , \d -> maybeText (_hyperdataDocument_source d)
219 , \d -> maybeText (_hyperdataDocument_publication_date d)
220 ]
221
222 ---------------------------------------------------------------------------
223 -- * Uniqueness of document definition
224 -- TODO factorize with above (use the function below for tests)
225
226 instance AddUniqId HyperdataContact
227 where
228 addUniqId = addUniqIdsContact
229
230 addUniqIdsContact :: HyperdataContact -> HyperdataContact
231 addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
232 $ set (hc_uniqId ) (Just shaUni) hc
233 where
234 shaUni = uniqId $ DT.concat $ map ($ hc) shaParametersContact
235 shaBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
236
237 uniqId :: Text -> Text
238 uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
239
240 -- | TODO add more shaparameters
241 shaParametersContact :: [(HyperdataContact -> Text)]
242 shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName) d
243 , \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
244 , \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
245 ]
246
247
248
249 maybeText :: Maybe Text -> Text
250 maybeText = maybe (DT.pack "") identity
251
252 ---------------------------------------------------------------------------