]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
[hyperdata] refactor code to add hyperdata graph metrics
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16
17 module Gargantext.Database.Action.Search where
18
19 import Control.Arrow (returnA)
20 import Control.Lens ((^.))
21 import Data.Aeson
22 import Data.List (intersperse, take, drop)
23 import Data.Map.Strict hiding (map, drop, take)
24 import Data.Maybe
25 import Data.String (IsString(..))
26 import Data.Text (Text, words, unpack, intercalate)
27 import Data.Time (UTCTime)
28 import Database.PostgreSQL.Simple (Query)
29 import Database.PostgreSQL.Simple.ToField
30 import Opaleye hiding (Query, Order)
31 import qualified Opaleye as O hiding (Order)
32
33 import Gargantext.Core.Types
34 import Gargantext.Database.Admin.Config (nodeTypeId)
35 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
36 import Gargantext.Database.Admin.Types.Node (NodeType(..))
37 import Gargantext.Database.Query.Facet
38 import Gargantext.Database.Query.Join (leftJoin6)
39 import Gargantext.Database.Query.Table.Node
40 import Gargantext.Database.Query.Table.NodeNode
41 import Gargantext.Database.Query.Table.NodeNodeNgrams
42 import Gargantext.Database.Query.Table.Ngrams
43 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
44 import Gargantext.Database.Schema.Node
45 import Gargantext.Prelude
46 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
47
48 ------------------------------------------------------------------------
49 searchInDatabase :: ParentId
50 -> Text
51 -> Cmd err [(NodeId, HyperdataDocument)]
52 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
53 where
54 -- | Global search query where ParentId is Master Node Corpus Id
55 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
56 queryInDatabase _ q = proc () -> do
57 row <- queryNodeSearchTable -< ()
58 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
59 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
60 returnA -< (_ns_id row, _ns_hyperdata row)
61
62 ------------------------------------------------------------------------
63 -- | todo add limit and offset and order
64 searchInCorpus :: CorpusId
65 -> IsTrash
66 -> [Text]
67 -> Maybe Offset
68 -> Maybe Limit
69 -> Maybe OrderBy
70 -> Cmd err [FacetDoc]
71 searchInCorpus cId t q o l order = runOpaQuery
72 $ filterWith o l order
73 $ queryInCorpus cId t
74 $ intercalate " | "
75 $ map stemIt q
76
77 searchCountInCorpus :: CorpusId
78 -> IsTrash
79 -> [Text]
80 -> Cmd err Int
81 searchCountInCorpus cId t q = runCountOpaQuery
82 $ queryInCorpus cId t
83 $ intercalate " | "
84 $ map stemIt q
85
86 queryInCorpus :: CorpusId
87 -> IsTrash
88 -> Text
89 -> O.Query FacetDocRead
90 queryInCorpus cId t q = proc () -> do
91 (n, nn) <- joinInCorpus -< ()
92 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
93 restrict -< if t
94 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
95 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
96 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
97 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
98 returnA -< FacetDoc (n^.ns_id )
99 (n^.ns_date )
100 (n^.ns_name )
101 (n^.ns_hyperdata)
102 (nn^.nn_category)
103 (nn^.nn_score )
104
105 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
106 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
107 where
108 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
109 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
110
111 ------------------------------------------------------------------------
112 type AuthorName = Text
113
114 -- | TODO Optim: Offset and Limit in the Query
115 -- TODO-SECURITY check
116 searchInCorpusWithContacts
117 :: CorpusId
118 -> ListId
119 -> [Text]
120 -> Maybe Offset
121 -> Maybe Limit
122 -> Maybe OrderBy
123 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
124 searchInCorpusWithContacts cId lId q o l order =
125 take (maybe 10 identity l)
126 <$> drop (maybe 0 identity o)
127 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
128 <$> toList <$> fromListWith (<>)
129 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
130 , catMaybes [Pair <$> p1 <*> p2]
131 )
132 )
133 <$> searchInCorpusWithContacts' cId lId q o l order
134
135 -- TODO-SECURITY check
136 searchInCorpusWithContacts'
137 :: CorpusId
138 -> ListId
139 -> [Text]
140 -> Maybe Offset
141 -> Maybe Limit
142 -> Maybe OrderBy
143 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
144 searchInCorpusWithContacts' cId lId q o l order =
145 runOpaQuery $ queryInCorpusWithContacts cId lId o l order
146 $ intercalate " | "
147 $ map stemIt q
148
149
150 queryInCorpusWithContacts
151 :: CorpusId
152 -> ListId
153 -> Maybe Offset
154 -> Maybe Limit
155 -> Maybe OrderBy
156 -> Text
157 -> O.Query FacetPairedRead
158 queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
159 (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
160 restrict -< (n^.ns_search) @@ (pgTSQuery $ unpack q )
161 restrict -< (n^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
162 -- restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
163 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
164 -- -- restrict -< (nng_listType nng) .== (toNullable $ pgNgramsType Authors)
165 -- restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
166 -- -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
167 returnA -< FacetPaired (n^.ns_id)
168 (n^.ns_date)
169 (n^.ns_hyperdata)
170 (pgInt4 0)
171 (contacts^.node_id, ngrams'^.ngrams_terms)
172
173 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
174 , ( NodeNodeReadNull
175 , ( NodeNodeNgramsReadNull
176 , ( NgramsReadNull
177 , ( NodeNodeNgramsReadNull
178 , NodeReadNull
179 )
180 )
181 )
182 )
183 )
184 joinInCorpusWithContacts =
185 leftJoin6
186 queryNodeTable
187 queryNodeNodeNgramsTable
188 queryNgramsTable
189 queryNodeNodeNgramsTable
190 queryNodeNodeTable
191 queryNodeSearchTable
192 cond12
193 cond23
194 cond34
195 cond45
196 cond56
197 where
198 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
199 cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
200
201 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
202 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
203
204 cond34 :: ( NodeNodeNgramsRead
205 , ( NgramsRead
206 , ( NodeNodeNgramsReadNull
207 , NodeReadNull
208 )
209 )
210 ) -> Column PGBool
211 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
212
213 cond45 :: ( NodeNodeRead
214 , ( NodeNodeNgramsRead
215 , ( NgramsReadNull
216 , ( NodeNodeNgramsReadNull
217 , NodeReadNull
218 )
219 )
220 )
221 ) -> Column PGBool
222 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
223
224 cond56 :: ( NodeSearchRead
225 , ( NodeNodeRead
226 , ( NodeNodeNgramsReadNull
227 , ( NgramsReadNull
228 , ( NodeNodeNgramsReadNull
229 , NodeReadNull
230 )
231 )
232 )
233 )
234 ) -> Column PGBool
235 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
236
237
238 newtype TSQuery = UnsafeTSQuery [Text]
239
240 -- | TODO [""] -> panic "error"
241 toTSQuery :: [Text] -> TSQuery
242 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
243
244
245 instance IsString TSQuery
246 where
247 fromString = UnsafeTSQuery . words . cs
248
249
250 instance ToField TSQuery
251 where
252 toField (UnsafeTSQuery xs)
253 = Many $ intersperse (Plain " && ")
254 $ map (\q -> Many [ Plain "plainto_tsquery("
255 , Escape (cs q)
256 , Plain ")"
257 ]
258 ) xs
259
260 data Order = Asc | Desc
261
262 instance ToField Order
263 where
264 toField Asc = Plain "ASC"
265 toField Desc = Plain "DESC"
266
267 -- TODO
268 -- FIX fav
269 -- ADD ngrams count
270 -- TESTS
271 textSearchQuery :: Query
272 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
273 \ , n.hyperdata->'title' \
274 \ , n.hyperdata->'source' \
275 \ , n.hyperdata->'authors' \
276 \ , COALESCE(nn.score,null) \
277 \ FROM nodes n \
278 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
279 \ WHERE \
280 \ n.search @@ (?::tsquery) \
281 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
282 \ AND n.typename = ? \
283 \ ORDER BY n.hyperdata -> 'publication_date' ? \
284 \ offset ? limit ?;"
285
286 -- | Text Search Function for Master Corpus
287 -- TODO : text search for user corpus
288 -- Example:
289 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
290 -- textSearchTest pId q = textSearch q pId 5 0 Asc
291 textSearch :: TSQuery -> ParentId
292 -> Limit -> Offset -> Order
293 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
294 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
295 where
296 typeId = nodeTypeId NodeDocument
297
298