]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
Merge branch 'dev' into dev-doc-table-optimization
[gargantext.git] / src / Gargantext / Database / Query / Table / Node / Document / Insert.hs
1 {-|
2 Module : Gargantext.Database.Node.Document.Insert
3 Description : Importing context of texts (documents)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 * Purpose of this module
11
12 Enabling "common goods" of text data and respecting privacy.
13
14 Gargantext shares as "common good" the links between context of texts
15 and terms / words / ngrams.
16
17 Basically a context of text can be defined as a document (see 'Gargantext.Text').
18
19 Issue to tackle in that module: each global document of Gargantext has
20 to be unique, then shared, but how to respect privacy if needed ?
21
22
23 * Methodology to get uniqueness and privacy by design
24
25 As a consequence, when importing/inserting a new document in Gargantext,
26 a policy for the uniqueness of the inserted docuemnts has to be defined.
27
28 That is the purpose of this module which defines its main concepts.
29
30 Unique identifier in database is of a 3-tuple of 3 policies that
31 together define uniqueness:
32
33 - Design policy: type of node is needed as TypenameId, that is a
34 Document or Individual or something else;
35
36 - Privacy policy: with ParentId, parent becomes unique, then it enables
37 users to get their own copy without sharing it with all the users of the
38 database (in others words parent_id is necessary to preserve privacy for
39 instance).
40
41 - Hash policy: this UniqId is a sha256 uniq id which is the result of
42 the concatenation of the parameters defined by @shaParameters@.
43
44 > -- * Example
45 > insertTest :: FromRow r => CorpusId -> [Node HyperdataDocument] -> IO [r]
46 > insertTest :: IO [ReturnId]
47 > insertTest = runCmdDev $ insertDocuments 1 452162 hyperdataDocuments
48
49 -}
50 ------------------------------------------------------------------------
51 {-# LANGUAGE QuasiQuotes #-}
52 {-# LANGUAGE TypeSynonymInstances #-}
53 ------------------------------------------------------------------------
54 module Gargantext.Database.Query.Table.Node.Document.Insert
55 where
56
57 import Control.Lens (set, view)
58 import Control.Lens.Cons
59 import Control.Lens.Prism
60 import Data.Aeson (toJSON)
61 import Data.Maybe (maybe)
62 import Data.Text (Text)
63 import Data.Time.Segment (jour)
64 import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
65 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
66 import Database.PostgreSQL.Simple.SqlQQ
67 import Database.PostgreSQL.Simple.ToField (toField, Action)
68 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
69 import GHC.Generics (Generic)
70 import Gargantext.Database.Admin.Config (nodeTypeId)
71 import Gargantext.Database.Admin.Types.Hyperdata
72 import Gargantext.Database.Admin.Types.Node
73 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
74 import Gargantext.Prelude
75 import Gargantext.Prelude.Utils (hash)
76 import qualified Data.Text as DT (pack, concat, take)
77
78 -- TODO : the import of Document constructor below does not work
79 -- import Gargantext.Database.Types.Node (Document)
80 --import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..)
81 -- , hyperdataDocument_uniqId
82 -- , hyperdataDocument_title
83 -- , hyperdataDocument_abstract
84 -- , hyperdataDocument_source
85 -- , Node(..), node_typename
86 -- , node_userId
87 -- , node_parentId, node_name, node_hyperdata, hyperdataDocuments
88 -- , NodeTypeId
89 -- )
90 {-| To Print result query
91 import Data.ByteString.Internal (ByteString)
92 import Database.PostgreSQL.Simple (formatQuery)
93 -}
94
95 ---------------------------------------------------------------------------
96 -- * Main Insert functions
97
98 -- | Insert Document main function
99 -- UserId : user who is inserting the documents
100 -- ParentId : folder ID which is parent of the inserted documents
101 -- Administrator of the database has to create a uniq index as following SQL command:
102 -- `create unique index on nodes (typename, parent_id, (hyperdata ->> 'uniqId'));`
103 insertDb :: InsertDb a => UserId -> ParentId -> [a] -> Cmd err [ReturnId]
104 insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
105 where
106 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
107
108 class InsertDb a
109 where
110 insertDb' :: UserId -> ParentId -> a -> [Action]
111
112
113 instance InsertDb HyperdataDocument
114 where
115 insertDb' u p h = [ toField $ nodeTypeId NodeDocument
116 , toField u
117 , toField p
118 , toField $ maybe "No Title" (DT.take 255) (_hd_title h)
119 , toField $ _hd_publication_date h -- TODO USE UTCTime
120 , (toField . toJSON) h
121 ]
122
123 instance InsertDb HyperdataContact
124 where
125 insertDb' u p h = [ toField $ nodeTypeId NodeContact
126 , toField u
127 , toField p
128 , toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
129 , toField $ jour 2010 1 1 -- TODO put default date
130 , (toField . toJSON) h
131 ]
132
133 -- | Debug SQL function
134 --
135 -- to print rendered query (Debug purpose) use @formatQuery@ function.
136 {-
137 insertDocuments_Debug :: (Hyperdata a, ToJSON a, ToRow a) => UserId -> ParentId -> [a] -> Cmd ByteString
138 insertDocuments_Debug uId pId hs = formatPGSQuery queryInsert (Only $ Values fields inputData)
139 where
140 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
141 inputData = prepare uId pId hs
142 -}
143
144
145 -- | Input Tables: types of the tables
146 inputSqlTypes :: [Text]
147 inputSqlTypes = map DT.pack ["int4","int4","int4","text","date","jsonb"]
148
149 -- | SQL query to insert documents inside the database
150 queryInsert :: Query
151 queryInsert = [sql|
152 WITH input_rows(typename,user_id,parent_id,name,date,hyperdata) AS (?)
153 , ins AS (
154 INSERT INTO nodes (typename,user_id,parent_id,name,date,hyperdata)
155 SELECT * FROM input_rows
156 ON CONFLICT ((hyperdata ->> 'uniqIdBdd')) DO NOTHING -- on unique index
157 -- ON CONFLICT (typename, parent_id, (hyperdata ->> 'uniqId')) DO NOTHING -- on unique index
158 RETURNING id,hyperdata
159 )
160
161 SELECT true AS source -- true for 'newly inserted'
162 , id
163 , hyperdata ->> 'uniqId' as doi
164 FROM ins
165 UNION ALL
166 SELECT false AS source -- false for 'not inserted'
167 , c.id
168 , hyperdata ->> 'uniqId' as doi
169 FROM input_rows
170 JOIN nodes c USING (hyperdata); -- columns of unique index
171 |]
172
173 ------------------------------------------------------------------------
174 -- * Main Types used
175
176 -- ** Return Types
177
178 -- | When documents are inserted
179 -- ReturnType after insertion
180 data ReturnId = ReturnId { reInserted :: Bool -- if the document is inserted (True: is new, False: is not new)
181 , reId :: NodeId -- always return the id of the document (even new or not new)
182 -- this is the uniq id in the database
183 , reUniqId :: Text -- Hash Id with concatenation of sha parameters
184 } deriving (Show, Generic)
185
186 instance FromRow ReturnId where
187 fromRow = ReturnId <$> field <*> field <*> field
188
189 ---------------------------------------------------------------------------
190 -- * Uniqueness of document definition
191
192 class AddUniqId a
193 where
194 addUniqId :: a -> a
195
196 instance AddUniqId HyperdataDocument
197 where
198 addUniqId = addUniqIdsDoc
199 where
200 addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
201 addUniqIdsDoc doc = set hd_uniqIdBdd (Just shaBdd)
202 $ set hd_uniqId (Just shaUni) doc
203 where
204 shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
205 shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
206
207 shaParametersDoc :: [(HyperdataDocument -> Text)]
208 shaParametersDoc = [ \d -> maybeText (_hd_title d)
209 , \d -> maybeText (_hd_abstract d)
210 , \d -> maybeText (_hd_source d)
211 , \d -> maybeText (_hd_publication_date d)
212 ]
213
214 ---------------------------------------------------------------------------
215 -- * Uniqueness of document definition
216 -- TODO factorize with above (use the function below for tests)
217
218 instance AddUniqId HyperdataContact
219 where
220 addUniqId = addUniqIdsContact
221
222 addUniqIdsContact :: HyperdataContact -> HyperdataContact
223 addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
224 $ set (hc_uniqId ) (Just shaUni) hc
225 where
226 shaUni = hash $ DT.concat $ map ($ hc) shaParametersContact
227 shaBdd = hash $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
228
229 -- | TODO add more shaparameters
230 shaParametersContact :: [(HyperdataContact -> Text)]
231 shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName) d
232 , \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
233 , \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
234 ]
235
236
237
238 maybeText :: Maybe Text -> Text
239 maybeText = maybe (DT.pack "") identity
240
241 ---------------------------------------------------------------------------