]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
[SECURITY|COLLAB] can share Node Team only
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12
13 module Gargantext.Database.Action.Search where
14
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
17 import Data.Aeson
18 import Data.List (intersperse, take, drop)
19 import Data.Map.Strict hiding (map, drop, take)
20 import Data.Maybe
21 import Data.String (IsString(..))
22 import Data.Text (Text, words, unpack, intercalate)
23 import Data.Time (UTCTime)
24 import Database.PostgreSQL.Simple (Query)
25 import Database.PostgreSQL.Simple.ToField
26 import Gargantext.Core.Types
27 import Gargantext.Database.Query.Facet
28 import Gargantext.Database.Query.Join (leftJoin6)
29 import Gargantext.Database.Query.Table.Node
30 import Gargantext.Database.Query.Table.NodeNode
31 import Gargantext.Database.Query.Table.NodeNodeNgrams
32 import Gargantext.Database.Query.Table.Ngrams
33 import Gargantext.Database.Admin.Config (nodeTypeId)
34 import Gargantext.Database.Admin.Types.Node (NodeType(..))
35 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
36 import Gargantext.Database.Schema.Node
37 import Gargantext.Prelude
38 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
39 import Opaleye hiding (Query, Order)
40 import qualified Opaleye as O hiding (Order)
41
42 ------------------------------------------------------------------------
43 searchInDatabase :: ParentId
44 -> Text
45 -> Cmd err [(NodeId, HyperdataDocument)]
46 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
47 where
48 -- | Global search query where ParentId is Master Node Corpus Id
49 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
50 queryInDatabase _ q = proc () -> do
51 row <- queryNodeSearchTable -< ()
52 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
53 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
54 returnA -< (_ns_id row, _ns_hyperdata row)
55
56 ------------------------------------------------------------------------
57 -- | todo add limit and offset and order
58 searchInCorpus :: CorpusId
59 -> IsTrash
60 -> [Text]
61 -> Maybe Offset
62 -> Maybe Limit
63 -> Maybe OrderBy
64 -> Cmd err [FacetDoc]
65 searchInCorpus cId t q o l order = runOpaQuery
66 $ filterWith o l order
67 $ queryInCorpus cId t
68 $ intercalate " | "
69 $ map stemIt q
70
71 searchCountInCorpus :: CorpusId
72 -> IsTrash
73 -> [Text]
74 -> Cmd err Int
75 searchCountInCorpus cId t q = runCountOpaQuery
76 $ queryInCorpus cId t
77 $ intercalate " | "
78 $ map stemIt q
79
80 queryInCorpus :: CorpusId
81 -> IsTrash
82 -> Text
83 -> O.Query FacetDocRead
84 queryInCorpus cId t q = proc () -> do
85 (n, nn) <- joinInCorpus -< ()
86 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
87 restrict -< if t
88 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
89 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
90 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
91 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
92 returnA -< FacetDoc (n^.ns_id )
93 (n^.ns_date )
94 (n^.ns_name )
95 (n^.ns_hyperdata)
96 (nn^.nn_category)
97 (nn^.nn_score )
98
99 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
100 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
101 where
102 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
103 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
104
105 ------------------------------------------------------------------------
106 type AuthorName = Text
107
108 -- | TODO Optim: Offset and Limit in the Query
109 -- TODO-SECURITY check
110 searchInCorpusWithContacts
111 :: CorpusId
112 -> ListId
113 -> [Text]
114 -> Maybe Offset
115 -> Maybe Limit
116 -> Maybe OrderBy
117 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
118 searchInCorpusWithContacts cId lId q o l order =
119 take (maybe 10 identity l)
120 <$> drop (maybe 0 identity o)
121 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
122 <$> toList <$> fromListWith (<>)
123 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
124 , catMaybes [Pair <$> p1 <*> p2]
125 )
126 )
127 <$> searchInCorpusWithContacts' cId lId q o l order
128
129 -- TODO-SECURITY check
130 searchInCorpusWithContacts'
131 :: CorpusId
132 -> ListId
133 -> [Text]
134 -> Maybe Offset
135 -> Maybe Limit
136 -> Maybe OrderBy
137 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
138 searchInCorpusWithContacts' cId lId q o l order =
139 runOpaQuery $ queryInCorpusWithContacts cId lId o l order
140 $ intercalate " | "
141 $ map stemIt q
142
143
144 queryInCorpusWithContacts
145 :: CorpusId
146 -> ListId
147 -> Maybe Offset
148 -> Maybe Limit
149 -> Maybe OrderBy
150 -> Text
151 -> O.Query FacetPairedRead
152 queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
153 (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
154 restrict -< (n^.ns_search) @@ (pgTSQuery $ unpack q )
155 restrict -< (n^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
156 -- restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
157 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
158 -- -- restrict -< (nng_listType nng) .== (toNullable $ pgNgramsType Authors)
159 -- restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
160 -- -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
161 returnA -< FacetPaired (n^.ns_id)
162 (n^.ns_date)
163 (n^.ns_hyperdata)
164 (pgInt4 0)
165 (contacts^.node_id, ngrams'^.ngrams_terms)
166
167 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
168 , ( NodeNodeReadNull
169 , ( NodeNodeNgramsReadNull
170 , ( NgramsReadNull
171 , ( NodeNodeNgramsReadNull
172 , NodeReadNull
173 )
174 )
175 )
176 )
177 )
178 joinInCorpusWithContacts =
179 leftJoin6
180 queryNodeTable
181 queryNodeNodeNgramsTable
182 queryNgramsTable
183 queryNodeNodeNgramsTable
184 queryNodeNodeTable
185 queryNodeSearchTable
186 cond12
187 cond23
188 cond34
189 cond45
190 cond56
191 where
192 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
193 cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
194
195 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
196 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
197
198 cond34 :: ( NodeNodeNgramsRead
199 , ( NgramsRead
200 , ( NodeNodeNgramsReadNull
201 , NodeReadNull
202 )
203 )
204 ) -> Column PGBool
205 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
206
207 cond45 :: ( NodeNodeRead
208 , ( NodeNodeNgramsRead
209 , ( NgramsReadNull
210 , ( NodeNodeNgramsReadNull
211 , NodeReadNull
212 )
213 )
214 )
215 ) -> Column PGBool
216 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
217
218 cond56 :: ( NodeSearchRead
219 , ( NodeNodeRead
220 , ( NodeNodeNgramsReadNull
221 , ( NgramsReadNull
222 , ( NodeNodeNgramsReadNull
223 , NodeReadNull
224 )
225 )
226 )
227 )
228 ) -> Column PGBool
229 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
230
231
232 newtype TSQuery = UnsafeTSQuery [Text]
233
234 -- | TODO [""] -> panic "error"
235 toTSQuery :: [Text] -> TSQuery
236 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
237
238
239 instance IsString TSQuery
240 where
241 fromString = UnsafeTSQuery . words . cs
242
243
244 instance ToField TSQuery
245 where
246 toField (UnsafeTSQuery xs)
247 = Many $ intersperse (Plain " && ")
248 $ map (\q -> Many [ Plain "plainto_tsquery("
249 , Escape (cs q)
250 , Plain ")"
251 ]
252 ) xs
253
254 data Order = Asc | Desc
255
256 instance ToField Order
257 where
258 toField Asc = Plain "ASC"
259 toField Desc = Plain "DESC"
260
261 -- TODO
262 -- FIX fav
263 -- ADD ngrams count
264 -- TESTS
265 textSearchQuery :: Query
266 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
267 \ , n.hyperdata->'title' \
268 \ , n.hyperdata->'source' \
269 \ , n.hyperdata->'authors' \
270 \ , COALESCE(nn.score,null) \
271 \ FROM nodes n \
272 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
273 \ WHERE \
274 \ n.search @@ (?::tsquery) \
275 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
276 \ AND n.typename = ? \
277 \ ORDER BY n.hyperdata -> 'publication_date' ? \
278 \ offset ? limit ?;"
279
280 -- | Text Search Function for Master Corpus
281 -- TODO : text search for user corpus
282 -- Example:
283 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
284 -- textSearchTest pId q = textSearch q pId 5 0 Asc
285 textSearch :: TSQuery -> ParentId
286 -> Limit -> Offset -> Order
287 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
288 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
289 where
290 typeId = nodeTypeId NodeDocument
291
292