]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
[hyperdata] refactor code to add hyperdata graph metrics
[gargantext.git] / src / Gargantext / Database / Query / Table / Node / Document / Insert.hs
1 {-|
2 Module : Gargantext.Database.Node.Document.Insert
3 Description : Importing context of texts (documents)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 * Purpose of this module
11
12 Enabling "common goods" of text data and respecting privacy.
13
14 Gargantext shares as "common good" the links between context of texts
15 and terms / words / ngrams.
16
17 Basically a context of text can be defined as a document (see 'Gargantext.Text').
18
19 Issue to tackle in that module: each global document of Gargantext has
20 to be unique, then shared, but how to respect privacy if needed ?
21
22
23 * Methodology to get uniqueness and privacy by design
24
25 As a consequence, when importing/inserting a new document in Gargantext,
26 a policy for the uniqueness of the inserted docuemnts has to be defined.
27
28 That is the purpose of this module which defines its main concepts.
29
30 Unique identifier in database is of a 3-tuple of 3 policies that
31 together define uniqueness:
32
33 - Design policy: type of node is needed as TypenameId, that is a
34 Document or Individual or something else;
35
36 - Privacy policy: with ParentId, parent becomes unique, then it enables
37 users to get their own copy without sharing it with all the users of the
38 database (in others words parent_id is necessary to preserve privacy for
39 instance).
40
41 - Hash policy: this UniqId is a sha256 uniq id which is the result of
42 the concatenation of the parameters defined by @shaParameters@.
43
44 > -- * Example
45 > insertTest :: FromRow r => CorpusId -> [Node HyperdataDocument] -> IO [r]
46 > insertTest :: IO [ReturnId]
47 > insertTest = runCmdDev $ insertDocuments 1 452162 hyperdataDocuments
48
49 -}
50 ------------------------------------------------------------------------
51 {-# LANGUAGE DeriveGeneric #-}
52 {-# LANGUAGE FlexibleContexts #-}
53 {-# LANGUAGE FlexibleInstances #-}
54 {-# LANGUAGE NoImplicitPrelude #-}
55 {-# LANGUAGE OverloadedStrings #-}
56 {-# LANGUAGE QuasiQuotes #-}
57 {-# LANGUAGE RankNTypes #-}
58 {-# LANGUAGE TypeSynonymInstances #-}
59 ------------------------------------------------------------------------
60 module Gargantext.Database.Query.Table.Node.Document.Insert
61 where
62
63 import Control.Lens (set, view)
64 import Control.Lens.Cons
65 import Control.Lens.Prism
66 import Data.Aeson (toJSON)
67 import Data.Maybe (maybe)
68 import Data.Text (Text)
69 import Data.Time.Segment (jour)
70 import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
71 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
72 import Database.PostgreSQL.Simple.SqlQQ
73 import Database.PostgreSQL.Simple.ToField (toField, Action)
74 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
75 import GHC.Generics (Generic)
76 import qualified Data.ByteString.Lazy.Char8 as DC (pack)
77 import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
78 import qualified Data.Text as DT (pack, unpack, concat, take)
79
80 import Gargantext.Database.Query.Table.Node.Contact -- (HyperdataContact(..), ContactWho(..))
81 import Gargantext.Database.Admin.Config (nodeTypeId)
82 import Gargantext.Database.Admin.Types.Hyperdata
83 import Gargantext.Database.Admin.Types.Node
84 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
85 import Gargantext.Prelude
86 import Gargantext.Prelude.Utils (sha)
87
88 -- TODO : the import of Document constructor below does not work
89 -- import Gargantext.Database.Types.Node (Document)
90 --import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..)
91 -- , hyperdataDocument_uniqId
92 -- , hyperdataDocument_title
93 -- , hyperdataDocument_abstract
94 -- , hyperdataDocument_source
95 -- , Node(..), node_typename
96 -- , node_userId
97 -- , node_parentId, node_name, node_hyperdata, hyperdataDocuments
98 -- , NodeTypeId
99 -- )
100 {-| To Print result query
101 import Data.ByteString.Internal (ByteString)
102 import Database.PostgreSQL.Simple (formatQuery)
103 -}
104
105 ---------------------------------------------------------------------------
106 -- * Main Insert functions
107
108 -- | Insert Document main function
109 -- UserId : user who is inserting the documents
110 -- ParentId : folder ID which is parent of the inserted documents
111 -- Administrator of the database has to create a uniq index as following SQL command:
112 -- `create unique index on nodes (typename, parent_id, (hyperdata ->> 'uniqId'));`
113 insertDb :: InsertDb a => UserId -> ParentId -> [a] -> Cmd err [ReturnId]
114 insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
115 where
116 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
117
118 class InsertDb a
119 where
120 insertDb' :: UserId -> ParentId -> a -> [Action]
121
122
123 instance InsertDb HyperdataDocument
124 where
125 insertDb' u p h = [ toField $ nodeTypeId NodeDocument
126 , toField u
127 , toField p
128 , toField $ maybe "No Title" (DT.take 255) (_hyperdataDocument_title h)
129 , toField $ _hyperdataDocument_publication_date h -- TODO USE UTCTime
130 , (toField . toJSON) h
131 ]
132
133 instance InsertDb HyperdataContact
134 where
135 insertDb' u p h = [ toField $ nodeTypeId NodeContact
136 , toField u
137 , toField p
138 , toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
139 , toField $ jour 2010 1 1 -- TODO put default date
140 , (toField . toJSON) h
141 ]
142
143 -- | Debug SQL function
144 --
145 -- to print rendered query (Debug purpose) use @formatQuery@ function.
146 {-
147 insertDocuments_Debug :: (Hyperdata a, ToJSON a, ToRow a) => UserId -> ParentId -> [a] -> Cmd ByteString
148 insertDocuments_Debug uId pId hs = formatPGSQuery queryInsert (Only $ Values fields inputData)
149 where
150 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
151 inputData = prepare uId pId hs
152 -}
153
154
155 -- | Input Tables: types of the tables
156 inputSqlTypes :: [Text]
157 inputSqlTypes = map DT.pack ["int4","int4","int4","text","date","jsonb"]
158
159 -- | SQL query to insert documents inside the database
160 queryInsert :: Query
161 queryInsert = [sql|
162 WITH input_rows(typename,user_id,parent_id,name,date,hyperdata) AS (?)
163 , ins AS (
164 INSERT INTO nodes (typename,user_id,parent_id,name,date,hyperdata)
165 SELECT * FROM input_rows
166 ON CONFLICT ((hyperdata ->> 'uniqIdBdd')) DO NOTHING -- on unique index
167 -- ON CONFLICT (typename, parent_id, (hyperdata ->> 'uniqId')) DO NOTHING -- on unique index
168 RETURNING id,hyperdata
169 )
170
171 SELECT true AS source -- true for 'newly inserted'
172 , id
173 , hyperdata ->> 'uniqId' as doi
174 FROM ins
175 UNION ALL
176 SELECT false AS source -- false for 'not inserted'
177 , c.id
178 , hyperdata ->> 'uniqId' as doi
179 FROM input_rows
180 JOIN nodes c USING (hyperdata); -- columns of unique index
181 |]
182
183 ------------------------------------------------------------------------
184 -- * Main Types used
185
186 -- ** Return Types
187
188 -- | When documents are inserted
189 -- ReturnType after insertion
190 data ReturnId = ReturnId { reInserted :: Bool -- if the document is inserted (True: is new, False: is not new)
191 , reId :: NodeId -- always return the id of the document (even new or not new)
192 -- this is the uniq id in the database
193 , reUniqId :: Text -- Hash Id with concatenation of sha parameters
194 } deriving (Show, Generic)
195
196 instance FromRow ReturnId where
197 fromRow = ReturnId <$> field <*> field <*> field
198
199 ---------------------------------------------------------------------------
200 -- * Uniqueness of document definition
201
202 class AddUniqId a
203 where
204 addUniqId :: a -> a
205
206 instance AddUniqId HyperdataDocument
207 where
208 addUniqId = addUniqIdsDoc
209 where
210 addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
211 addUniqIdsDoc doc = set hyperdataDocument_uniqIdBdd (Just shaBdd)
212 $ set hyperdataDocument_uniqId (Just shaUni) doc
213 where
214 shaUni = sha $ DT.concat $ map ($ doc) shaParametersDoc
215 shaBdd = sha $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hyperdataDocument_bdd d))] <> shaParametersDoc)
216
217 shaParametersDoc :: [(HyperdataDocument -> Text)]
218 shaParametersDoc = [ \d -> maybeText (_hyperdataDocument_title d)
219 , \d -> maybeText (_hyperdataDocument_abstract d)
220 , \d -> maybeText (_hyperdataDocument_source d)
221 , \d -> maybeText (_hyperdataDocument_publication_date d)
222 ]
223
224 ---------------------------------------------------------------------------
225 -- * Uniqueness of document definition
226 -- TODO factorize with above (use the function below for tests)
227
228 instance AddUniqId HyperdataContact
229 where
230 addUniqId = addUniqIdsContact
231
232 addUniqIdsContact :: HyperdataContact -> HyperdataContact
233 addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
234 $ set (hc_uniqId ) (Just shaUni) hc
235 where
236 shaUni = uniqId $ DT.concat $ map ($ hc) shaParametersContact
237 shaBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
238
239 uniqId :: Text -> Text
240 uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
241
242 -- | TODO add more shaparameters
243 shaParametersContact :: [(HyperdataContact -> Text)]
244 shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName) d
245 , \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
246 , \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
247 ]
248
249
250
251 maybeText :: Maybe Text -> Text
252 maybeText = maybe (DT.pack "") identity
253
254 ---------------------------------------------------------------------------