]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Node/Document/Insert.hs
[Flow] using user id. TODO : tests.
[gargantext.git] / src / Gargantext / Database / Node / Document / Insert.hs
1 {-|
2 Module : Gargantext.Database.Node.Document.Insert
3 Description : Importing context of texts (documents)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 * Purpose of this module
11
12 Enabling "common goods" of text data and respecting privacy.
13
14 Gargantext shares as "common good" the links between context of texts
15 and terms / words / ngrams.
16
17 Basically a context of text can be defined as a document (see 'Gargantext.Text').
18
19 Issue to tackle in that module: each global document of Gargantext has
20 to be unique, then shared, but how to respect privacy if needed ?
21
22
23 * Methodology to get uniqueness and privacy by design
24
25 As a consequence, when importing/inserting a new document in Gargantext,
26 a policy for the uniqueness of the inserted docuemnts has to be defined.
27
28 That is the purpose of this module which defines its main concepts.
29
30 Unique identifier in database is of a 3-tuple of 3 policies that
31 together define uniqueness:
32
33 - Design policy: type of node is needed as TypenameId, that is a
34 Document or Individual or something else;
35
36 - Privacy policy: with ParentId, parent becomes unique, then it enables
37 users to get their own copy without sharing it with all the users of the
38 database (in others words parent_id is necessary to preserve privacy for
39 instance).
40
41 - Hash policy: this UniqId is a sha256 uniq id which is the result of
42 the concatenation of the parameters defined by @shaParameters@.
43
44 > -- * Example
45 > insertTest :: FromRow r => CorpusId -> [Node HyperdataDocument] -> IO [r]
46 > insertTest :: IO [ReturnId]
47 > insertTest = runCmdDev $ insertDocuments 1 452162 hyperdataDocuments
48
49 -}
50 ------------------------------------------------------------------------
51 {-# LANGUAGE DeriveGeneric #-}
52 {-# LANGUAGE FlexibleContexts #-}
53 {-# LANGUAGE FlexibleInstances #-}
54 {-# LANGUAGE NoImplicitPrelude #-}
55 {-# LANGUAGE OverloadedStrings #-}
56 {-# LANGUAGE QuasiQuotes #-}
57 {-# LANGUAGE RankNTypes #-}
58 {-# LANGUAGE TypeSynonymInstances #-}
59 ------------------------------------------------------------------------
60 module Gargantext.Database.Node.Document.Insert where
61
62 import Control.Lens (set, view)
63 import Control.Lens.Prism
64 import Control.Lens.Cons
65 import Data.Aeson (toJSON)
66 import Data.Maybe (maybe)
67 import Data.Time.Segment (jour)
68 import Data.Text (Text)
69 import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
70 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
71 import Database.PostgreSQL.Simple.SqlQQ
72 import Database.PostgreSQL.Simple.ToField (toField, Action)
73 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
74 import GHC.Generics (Generic)
75 import Gargantext.Database.Config (nodeTypeId)
76 import Gargantext.Database.Utils (Cmd, runPGSQuery)
77 import Gargantext.Database.Node.Contact -- (HyperdataContact(..), ContactWho(..))
78 import Gargantext.Database.Types.Node
79 import Gargantext.Prelude
80 import qualified Data.ByteString.Lazy.Char8 as DC (pack)
81 import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
82 import qualified Data.Text as DT (pack, unpack, concat, take)
83 import Gargantext.Prelude.Utils (sha)
84 -- TODO : the import of Document constructor below does not work
85 -- import Gargantext.Database.Types.Node (Document)
86 --import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..)
87 -- , hyperdataDocument_uniqId
88 -- , hyperdataDocument_title
89 -- , hyperdataDocument_abstract
90 -- , hyperdataDocument_source
91 -- , Node(..), node_typename
92 -- , node_userId
93 -- , node_parentId, node_name, node_hyperdata, hyperdataDocuments
94 -- , NodeTypeId
95 -- )
96 {-| To Print result query
97 import Data.ByteString.Internal (ByteString)
98 import Database.PostgreSQL.Simple (formatQuery)
99 -}
100
101 ---------------------------------------------------------------------------
102 -- * Main Insert functions
103
104 -- | Insert Document main function
105 -- UserId : user who is inserting the documents
106 -- ParentId : folder ID which is parent of the inserted documents
107 -- Administrator of the database has to create a uniq index as following SQL command:
108 -- `create unique index on nodes (typename, parent_id, (hyperdata ->> 'uniqId'));`
109 insertDb :: InsertDb a => UserId -> ParentId -> [a] -> Cmd err [ReturnId]
110 insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
111 where
112 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
113
114 class InsertDb a
115 where
116 insertDb' :: UserId -> ParentId -> a -> [Action]
117
118
119 instance InsertDb HyperdataDocument
120 where
121 insertDb' u p h = [ toField $ nodeTypeId NodeDocument
122 , toField u
123 , toField p
124 , toField $ maybe "No Title" (DT.take 255) (_hyperdataDocument_title h)
125 , toField $ _hyperdataDocument_publication_date h -- TODO USE UTCTime
126 , (toField . toJSON) h
127 ]
128
129 instance InsertDb HyperdataContact
130 where
131 insertDb' u p h = [ toField $ nodeTypeId NodeContact
132 , toField u
133 , toField p
134 , toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
135 , toField $ jour 2010 1 1 -- TODO put default date
136 , (toField . toJSON) h
137 ]
138
139 -- | Debug SQL function
140 --
141 -- to print rendered query (Debug purpose) use @formatQuery@ function.
142 {-
143 insertDocuments_Debug :: (Hyperdata a, ToJSON a, ToRow a) => UserId -> ParentId -> [a] -> Cmd ByteString
144 insertDocuments_Debug uId pId hs = formatPGSQuery queryInsert (Only $ Values fields inputData)
145 where
146 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
147 inputData = prepare uId pId hs
148 -}
149
150
151 -- | Input Tables: types of the tables
152 inputSqlTypes :: [Text]
153 inputSqlTypes = map DT.pack ["int4","int4","int4","text","date","jsonb"]
154
155 -- | SQL query to insert documents inside the database
156 queryInsert :: Query
157 queryInsert = [sql|
158 WITH input_rows(typename,user_id,parent_id,name,date,hyperdata) AS (?)
159 , ins AS (
160 INSERT INTO nodes (typename,user_id,parent_id,name,date,hyperdata)
161 SELECT * FROM input_rows
162 ON CONFLICT ((hyperdata ->> 'uniqIdBdd')) DO NOTHING -- on unique index
163 -- ON CONFLICT (typename, parent_id, (hyperdata ->> 'uniqId')) DO NOTHING -- on unique index
164 RETURNING id,hyperdata
165 )
166
167 SELECT true AS source -- true for 'newly inserted'
168 , id
169 , hyperdata ->> 'uniqId' as doi
170 FROM ins
171 UNION ALL
172 SELECT false AS source -- false for 'not inserted'
173 , c.id
174 , hyperdata ->> 'uniqId' as doi
175 FROM input_rows
176 JOIN nodes c USING (hyperdata); -- columns of unique index
177 |]
178
179 ------------------------------------------------------------------------
180 -- * Main Types used
181
182 -- ** Return Types
183
184 -- | When documents are inserted
185 -- ReturnType after insertion
186 data ReturnId = ReturnId { reInserted :: Bool -- if the document is inserted (True: is new, False: is not new)
187 , reId :: NodeId -- always return the id of the document (even new or not new)
188 -- this is the uniq id in the database
189 , reUniqId :: Text -- Hash Id with concatenation of sha parameters
190 } deriving (Show, Generic)
191
192 instance FromRow ReturnId where
193 fromRow = ReturnId <$> field <*> field <*> field
194
195 ---------------------------------------------------------------------------
196 -- * Uniqueness of document definition
197
198 class AddUniqId a
199 where
200 addUniqId :: a -> a
201
202 instance AddUniqId HyperdataDocument
203 where
204 addUniqId = addUniqIdsDoc
205 where
206 addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
207 addUniqIdsDoc doc = set hyperdataDocument_uniqIdBdd (Just shaBdd)
208 $ set hyperdataDocument_uniqId (Just shaUni) doc
209 where
210 shaUni = sha $ DT.concat $ map ($ doc) shaParametersDoc
211 shaBdd = sha $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hyperdataDocument_bdd d))] <> shaParametersDoc)
212
213 shaParametersDoc :: [(HyperdataDocument -> Text)]
214 shaParametersDoc = [ \d -> maybeText (_hyperdataDocument_title d)
215 , \d -> maybeText (_hyperdataDocument_abstract d)
216 , \d -> maybeText (_hyperdataDocument_source d)
217 , \d -> maybeText (_hyperdataDocument_publication_date d)
218 ]
219
220 ---------------------------------------------------------------------------
221 -- * Uniqueness of document definition
222 -- TODO factorize with above (use the function below for tests)
223
224 instance AddUniqId HyperdataContact
225 where
226 addUniqId = addUniqIdsContact
227
228 addUniqIdsContact :: HyperdataContact -> HyperdataContact
229 addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
230 $ set (hc_uniqId ) (Just shaUni) hc
231 where
232 shaUni = uniqId $ DT.concat $ map ($ hc) shaParametersContact
233 shaBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
234
235 uniqId :: Text -> Text
236 uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
237
238 -- | TODO add more shaparameters
239 shaParametersContact :: [(HyperdataContact -> Text)]
240 shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName) d
241 , \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
242 , \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
243 ]
244
245
246
247 maybeText :: Maybe Text -> Text
248 maybeText = maybe (DT.pack "") identity
249
250 ---------------------------------------------------------------------------