]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Node/Document/Insert.hs
[API][DB] Pairing tools: get pairs and pairWith.
[gargantext.git] / src / Gargantext / Database / Node / Document / Insert.hs
1 {-|
2 Module : Gargantext.Database.Node.Document.Insert
3 Description : Importing context of texts (documents)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 * Purpose of this module
11
12 Enabling "common goods" of text data and respecting privacy.
13
14 Gargantext shares as "common good" the links between context of texts
15 and terms / words / ngrams.
16
17 Basically a context of text can be defined as a document (see 'Gargantext.Text').
18
19 Issue to tackle in that module: each global document of Gargantext has
20 to be unique, then shared, but how to respect privacy if needed ?
21
22
23 * Methodology to get uniqueness and privacy by design
24
25 As a consequence, when importing/inserting a new document in Gargantext,
26 a policy for the uniqueness of the inserted docuemnts has to be defined.
27
28 That is the purpose of this module which defines its main concepts.
29
30 Unique identifier in database is of a 3-tuple of 3 policies that
31 together define uniqueness:
32
33 - Design policy: type of node is needed as TypenameId, that is a
34 Document or Individual or something else;
35
36 - Privacy policy: with ParentId, parent becomes unique, then it enables
37 users to get their own copy without sharing it with all the users of the
38 database (in others words parent_id is necessary to preserve privacy for
39 instance).
40
41 - Hash policy: this UniqId is a sha256 uniq id which is the result of
42 the concatenation of the parameters defined by @shaParameters@.
43
44 > -- * Example
45 > insertTest :: FromRow r => CorpusId -> [Node HyperdataDocument] -> IO [r]
46 > insertTest :: IO [ReturnId]
47 > insertTest = runCmdDev $ insertDocuments 1 452162 hyperdataDocuments
48
49 -}
50 ------------------------------------------------------------------------
51 {-# LANGUAGE DeriveGeneric #-}
52 {-# LANGUAGE FlexibleInstances #-}
53 {-# LANGUAGE NoImplicitPrelude #-}
54 {-# LANGUAGE OverloadedStrings #-}
55 {-# LANGUAGE QuasiQuotes #-}
56 {-# LANGUAGE RankNTypes #-}
57 {-# LANGUAGE TypeSynonymInstances #-}
58 ------------------------------------------------------------------------
59 module Gargantext.Database.Node.Document.Insert where
60
61 import Control.Lens (set, view)
62 import Control.Lens.Prism
63 import Control.Lens.Cons
64 import Data.Aeson (toJSON)
65 import Data.Maybe (maybe)
66 import Data.Time.Segment (jour)
67 import Data.Text (Text)
68 import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
69 import Database.PostgreSQL.Simple.FromRow (fromRow, field)
70 import Database.PostgreSQL.Simple.SqlQQ
71 import Database.PostgreSQL.Simple.ToField (toField, Action)
72 import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
73 import GHC.Generics (Generic)
74 import Gargantext.Database.Config (nodeTypeId)
75 import Gargantext.Database.Utils (Cmd, runPGSQuery)
76 import Gargantext.Database.Node.Contact -- (HyperdataContact(..), ContactWho(..))
77 import Gargantext.Database.Types.Node
78 import Gargantext.Prelude
79 import qualified Data.ByteString.Lazy.Char8 as DC (pack)
80 import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
81 import qualified Data.Text as DT (pack, unpack, concat, take)
82 import Gargantext.Prelude.Utils (sha)
83 -- TODO : the import of Document constructor below does not work
84 -- import Gargantext.Database.Types.Node (Document)
85 --import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..)
86 -- , hyperdataDocument_uniqId
87 -- , hyperdataDocument_title
88 -- , hyperdataDocument_abstract
89 -- , hyperdataDocument_source
90 -- , Node(..), node_typename
91 -- , node_userId
92 -- , node_parentId, node_name, node_hyperdata, hyperdataDocuments
93 -- , NodeTypeId
94 -- )
95 {-| To Print result query
96 import Data.ByteString.Internal (ByteString)
97 import Database.PostgreSQL.Simple (formatQuery)
98 -}
99
100 ---------------------------------------------------------------------------
101 -- * Main Insert functions
102
103 -- | Insert Document main function
104 -- UserId : user who is inserting the documents
105 -- ParentId : folder ID which is parent of the inserted documents
106 -- Administrator of the database has to create a uniq index as following SQL command:
107 -- `create unique index on nodes (typename, parent_id, (hyperdata ->> 'uniqId'));`
108 insertDb :: InsertDb a => UserId -> ParentId -> [a] -> Cmd err [ReturnId]
109 insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
110 where
111 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
112
113 class InsertDb a
114 where
115 insertDb' :: UserId -> ParentId -> a -> [Action]
116
117
118 instance InsertDb HyperdataDocument
119 where
120 insertDb' u p h = [ toField $ nodeTypeId NodeDocument
121 , toField u
122 , toField p
123 , toField $ maybe "No Title" (DT.take 255) (_hyperdataDocument_title h)
124 , toField $ _hyperdataDocument_publication_date h -- TODO USE UTCTime
125 , (toField . toJSON) h
126 ]
127
128 instance InsertDb HyperdataContact
129 where
130 insertDb' u p h = [ toField $ nodeTypeId NodeContact
131 , toField u
132 , toField p
133 , toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
134 , toField $ jour 2010 1 1 -- TODO put default date
135 , (toField . toJSON) h
136 ]
137
138 -- | Debug SQL function
139 --
140 -- to print rendered query (Debug purpose) use @formatQuery@ function.
141 {-
142 insertDocuments_Debug :: (Hyperdata a, ToJSON a, ToRow a) => UserId -> ParentId -> [a] -> Cmd ByteString
143 insertDocuments_Debug uId pId hs = formatPGSQuery queryInsert (Only $ Values fields inputData)
144 where
145 fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
146 inputData = prepare uId pId hs
147 -}
148
149
150 -- | Input Tables: types of the tables
151 inputSqlTypes :: [Text]
152 inputSqlTypes = map DT.pack ["int4","int4","int4","text","date","jsonb"]
153
154 -- | SQL query to insert documents inside the database
155 queryInsert :: Query
156 queryInsert = [sql|
157 WITH input_rows(typename,user_id,parent_id,name,date,hyperdata) AS (?)
158 , ins AS (
159 INSERT INTO nodes (typename,user_id,parent_id,name,date,hyperdata)
160 SELECT * FROM input_rows
161 ON CONFLICT ((hyperdata ->> 'uniqIdBdd')) DO NOTHING -- on unique index
162 -- ON CONFLICT (typename, parent_id, (hyperdata ->> 'uniqId')) DO NOTHING -- on unique index
163 RETURNING id,hyperdata
164 )
165
166 SELECT true AS source -- true for 'newly inserted'
167 , id
168 , hyperdata ->> 'uniqId' as doi
169 FROM ins
170 UNION ALL
171 SELECT false AS source -- false for 'not inserted'
172 , c.id
173 , hyperdata ->> 'uniqId' as doi
174 FROM input_rows
175 JOIN nodes c USING (hyperdata); -- columns of unique index
176 |]
177
178 ------------------------------------------------------------------------
179 -- * Main Types used
180
181 -- ** Return Types
182
183 -- | When documents are inserted
184 -- ReturnType after insertion
185 data ReturnId = ReturnId { reInserted :: Bool -- if the document is inserted (True: is new, False: is not new)
186 , reId :: NodeId -- always return the id of the document (even new or not new)
187 -- this is the uniq id in the database
188 , reUniqId :: Text -- Hash Id with concatenation of sha parameters
189 } deriving (Show, Generic)
190
191 instance FromRow ReturnId where
192 fromRow = ReturnId <$> field <*> field <*> field
193
194 ---------------------------------------------------------------------------
195 -- * Uniqueness of document definition
196
197 class AddUniqId a
198 where
199 addUniqId :: a -> a
200
201 instance AddUniqId HyperdataDocument
202 where
203 addUniqId = addUniqIdsDoc
204 where
205 addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
206 addUniqIdsDoc doc = set hyperdataDocument_uniqIdBdd (Just shaBdd)
207 $ set hyperdataDocument_uniqId (Just shaUni) doc
208 where
209 shaUni = sha $ DT.concat $ map ($ doc) shaParametersDoc
210 shaBdd = sha $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hyperdataDocument_bdd d))] <> shaParametersDoc)
211
212 shaParametersDoc :: [(HyperdataDocument -> Text)]
213 shaParametersDoc = [ \d -> maybeText (_hyperdataDocument_title d)
214 , \d -> maybeText (_hyperdataDocument_abstract d)
215 , \d -> maybeText (_hyperdataDocument_source d)
216 , \d -> maybeText (_hyperdataDocument_publication_date d)
217 ]
218
219 ---------------------------------------------------------------------------
220 -- * Uniqueness of document definition
221 -- TODO factorize with above (use the function below for tests)
222
223 instance AddUniqId HyperdataContact
224 where
225 addUniqId = addUniqIdsContact
226
227 addUniqIdsContact :: HyperdataContact -> HyperdataContact
228 addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
229 $ set (hc_uniqId ) (Just shaUni) hc
230 where
231 shaUni = uniqId $ DT.concat $ map ($ hc) shaParametersContact
232 shaBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
233
234 uniqId :: Text -> Text
235 uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
236
237 -- | TODO add more shaparameters
238 shaParametersContact :: [(HyperdataContact -> Text)]
239 shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName) d
240 , \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
241 , \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
242 ]
243
244
245
246 maybeText :: Maybe Text -> Text
247 maybeText = maybe (DT.pack "") identity
248
249 ---------------------------------------------------------------------------