]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
[REFACT] clean Hyperdatas
[gargantext.git] / src / Gargantext / Database / Query / Table / Node / Document / Insert.hs
1 {-|
2 Module : Gargantext.Database.Node.Document.Insert
3 Description : Importing context of texts (documents)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 * Purpose of this module
11
12 Enabling "common goods" of text data and respecting privacy.
13
14 Gargantext shares as "common good" the links between context of texts
15 and terms / words / ngrams.
16
17 Basically a context of text can be defined as a document (see 'Gargantext.Text').
18
19 Issue to tackle in that module: each global document of Gargantext has
20 to be unique, then shared, but how to respect privacy if needed ?
21
22
23 * Methodology to get uniqueness and privacy by design
24
25 As a consequence, when importing/inserting a new document in Gargantext,
26 a policy for the uniqueness of the inserted docuemnts has to be defined.
27
28 That is the purpose of this module which defines its main concepts.
29
30 Unique identifier in database is of a 3-tuple of 3 policies that
31 together define uniqueness:
32
33 - Design policy: type of node is needed as TypenameId, that is a
34 Document or Individual or something else;
35
36 - Privacy policy: with ParentId, parent becomes unique, then it enables
37 users to get their own copy without sharing it with all the users of the
38 database (in others words parent_id is necessary to preserve privacy for
39 instance).
40
41 - Hash policy: this UniqId is a sha256 uniq id which is the result of
42 the concatenation of the parameters defined by @shaParameters@.
43
44 > -- * Example
45 > insertTest :: FromRow r => CorpusId -> [Node HyperdataDocument] -> IO [r]
46 > insertTest :: IO [ReturnId]
47 > insertTest = runCmdDev $ insertDocuments 1 452162 hyperdataDocuments
48
49 -}
50 ------------------------------------------------------------------------
51 {-# LANGUAGE QuasiQuotes #-}
52 {-# LANGUAGE TypeSynonymInstances #-}
53 ------------------------------------------------------------------------
54 module Gargantext.Database.Query.Table.Node.Document.Insert
55 where
56
57 import Control.Lens (set, view)
58 import Control.Lens.Cons
59 import Control.Lens.Prism
60 import Data.Aeson (toJSON)
61 import Data.Maybe (maybe)
62 import Data.Text (Text)
63 import Data.Time.Segment (jour)
64 import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
65 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
66 import Database.PostgreSQL.Simple.SqlQQ
67 import Database.PostgreSQL.Simple.ToField (toField, Action)
68 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
69 import GHC.Generics (Generic)
70 import Gargantext.Database.Admin.Config (nodeTypeId)
71 import Gargantext.Database.Admin.Types.Hyperdata
72 import Gargantext.Database.Admin.Types.Node
73 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
74 import Gargantext.Prelude
75 import Gargantext.Prelude.Utils (sha)
76 import qualified Data.ByteString.Lazy.Char8 as DC (pack)
77 import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
78 import qualified Data.Text as DT (pack, unpack, concat, take)
79
80 -- TODO : the import of Document constructor below does not work
81 -- import Gargantext.Database.Types.Node (Document)
82 --import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..)
83 -- , hyperdataDocument_uniqId
84 -- , hyperdataDocument_title
85 -- , hyperdataDocument_abstract
86 -- , hyperdataDocument_source
87 -- , Node(..), node_typename
88 -- , node_userId
89 -- , node_parentId, node_name, node_hyperdata, hyperdataDocuments
90 -- , NodeTypeId
91 -- )
92 {-| To Print result query
93 import Data.ByteString.Internal (ByteString)
94 import Database.PostgreSQL.Simple (formatQuery)
95 -}
96
97 ---------------------------------------------------------------------------
98 -- * Main Insert functions
99
100 -- | Insert Document main function
101 -- UserId : user who is inserting the documents
102 -- ParentId : folder ID which is parent of the inserted documents
103 -- Administrator of the database has to create a uniq index as following SQL command:
104 -- `create unique index on nodes (typename, parent_id, (hyperdata ->> 'uniqId'));`
105 insertDb :: InsertDb a => UserId -> ParentId -> [a] -> Cmd err [ReturnId]
106 insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
107 where
108 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
109
110 class InsertDb a
111 where
112 insertDb' :: UserId -> ParentId -> a -> [Action]
113
114
115 instance InsertDb HyperdataDocument
116 where
117 insertDb' u p h = [ toField $ nodeTypeId NodeDocument
118 , toField u
119 , toField p
120 , toField $ maybe "No Title" (DT.take 255) (_hd_title h)
121 , toField $ _hd_publication_date h -- TODO USE UTCTime
122 , (toField . toJSON) h
123 ]
124
125 instance InsertDb HyperdataContact
126 where
127 insertDb' u p h = [ toField $ nodeTypeId NodeContact
128 , toField u
129 , toField p
130 , toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
131 , toField $ jour 2010 1 1 -- TODO put default date
132 , (toField . toJSON) h
133 ]
134
135 -- | Debug SQL function
136 --
137 -- to print rendered query (Debug purpose) use @formatQuery@ function.
138 {-
139 insertDocuments_Debug :: (Hyperdata a, ToJSON a, ToRow a) => UserId -> ParentId -> [a] -> Cmd ByteString
140 insertDocuments_Debug uId pId hs = formatPGSQuery queryInsert (Only $ Values fields inputData)
141 where
142 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
143 inputData = prepare uId pId hs
144 -}
145
146
147 -- | Input Tables: types of the tables
148 inputSqlTypes :: [Text]
149 inputSqlTypes = map DT.pack ["int4","int4","int4","text","date","jsonb"]
150
151 -- | SQL query to insert documents inside the database
152 queryInsert :: Query
153 queryInsert = [sql|
154 WITH input_rows(typename,user_id,parent_id,name,date,hyperdata) AS (?)
155 , ins AS (
156 INSERT INTO nodes (typename,user_id,parent_id,name,date,hyperdata)
157 SELECT * FROM input_rows
158 ON CONFLICT ((hyperdata ->> 'uniqIdBdd')) DO NOTHING -- on unique index
159 -- ON CONFLICT (typename, parent_id, (hyperdata ->> 'uniqId')) DO NOTHING -- on unique index
160 RETURNING id,hyperdata
161 )
162
163 SELECT true AS source -- true for 'newly inserted'
164 , id
165 , hyperdata ->> 'uniqId' as doi
166 FROM ins
167 UNION ALL
168 SELECT false AS source -- false for 'not inserted'
169 , c.id
170 , hyperdata ->> 'uniqId' as doi
171 FROM input_rows
172 JOIN nodes c USING (hyperdata); -- columns of unique index
173 |]
174
175 ------------------------------------------------------------------------
176 -- * Main Types used
177
178 -- ** Return Types
179
180 -- | When documents are inserted
181 -- ReturnType after insertion
182 data ReturnId = ReturnId { reInserted :: Bool -- if the document is inserted (True: is new, False: is not new)
183 , reId :: NodeId -- always return the id of the document (even new or not new)
184 -- this is the uniq id in the database
185 , reUniqId :: Text -- Hash Id with concatenation of sha parameters
186 } deriving (Show, Generic)
187
188 instance FromRow ReturnId where
189 fromRow = ReturnId <$> field <*> field <*> field
190
191 ---------------------------------------------------------------------------
192 -- * Uniqueness of document definition
193
194 class AddUniqId a
195 where
196 addUniqId :: a -> a
197
198 instance AddUniqId HyperdataDocument
199 where
200 addUniqId = addUniqIdsDoc
201 where
202 addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
203 addUniqIdsDoc doc = set hd_uniqIdBdd (Just shaBdd)
204 $ set hd_uniqId (Just shaUni) doc
205 where
206 shaUni = sha $ DT.concat $ map ($ doc) shaParametersDoc
207 shaBdd = sha $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
208
209 shaParametersDoc :: [(HyperdataDocument -> Text)]
210 shaParametersDoc = [ \d -> maybeText (_hd_title d)
211 , \d -> maybeText (_hd_abstract d)
212 , \d -> maybeText (_hd_source d)
213 , \d -> maybeText (_hd_publication_date d)
214 ]
215
216 ---------------------------------------------------------------------------
217 -- * Uniqueness of document definition
218 -- TODO factorize with above (use the function below for tests)
219
220 instance AddUniqId HyperdataContact
221 where
222 addUniqId = addUniqIdsContact
223
224 addUniqIdsContact :: HyperdataContact -> HyperdataContact
225 addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
226 $ set (hc_uniqId ) (Just shaUni) hc
227 where
228 shaUni = uniqId $ DT.concat $ map ($ hc) shaParametersContact
229 shaBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
230
231 uniqId :: Text -> Text
232 uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
233
234 -- | TODO add more shaparameters
235 shaParametersContact :: [(HyperdataContact -> Text)]
236 shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName) d
237 , \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
238 , \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
239 ]
240
241
242
243 maybeText :: Maybe Text -> Text
244 maybeText = maybe (DT.pack "") identity
245
246 ---------------------------------------------------------------------------