]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
[Community pairing] alignment step
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12
13 module Gargantext.Database.Action.Search where
14
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
17 import Data.Aeson
18 import Data.List (intersperse, take, drop)
19 import Data.Map.Strict hiding (map, drop, take)
20 import Data.Maybe
21 import Data.String (IsString(..))
22 import Data.Text (Text, words, unpack, intercalate)
23 import Data.Time (UTCTime)
24 import Database.PostgreSQL.Simple (Query)
25 import Database.PostgreSQL.Simple.ToField
26 import Opaleye hiding (Query, Order)
27 import qualified Opaleye as O hiding (Order)
28
29 import Gargantext.Core.Types
30 import Gargantext.Database.Admin.Config (nodeTypeId)
31 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
32 import Gargantext.Database.Admin.Types.Node (NodeType(..))
33 import Gargantext.Database.Query.Facet
34 import Gargantext.Database.Query.Join (leftJoin6)
35 import Gargantext.Database.Query.Table.Node
36 import Gargantext.Database.Query.Table.NodeNode
37 import Gargantext.Database.Query.Table.NodeNodeNgrams
38 import Gargantext.Database.Query.Table.Ngrams
39 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
40 import Gargantext.Database.Schema.Node
41 import Gargantext.Prelude
42 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
43
44 ------------------------------------------------------------------------
45 searchInDatabase :: ParentId
46 -> Text
47 -> Cmd err [(NodeId, HyperdataDocument)]
48 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
49 where
50 -- | Global search query where ParentId is Master Node Corpus Id
51 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
52 queryInDatabase _ q = proc () -> do
53 row <- queryNodeSearchTable -< ()
54 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
55 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
56 returnA -< (_ns_id row, _ns_hyperdata row)
57
58 ------------------------------------------------------------------------
59 -- | todo add limit and offset and order
60 searchInCorpus :: CorpusId
61 -> IsTrash
62 -> [Text]
63 -> Maybe Offset
64 -> Maybe Limit
65 -> Maybe OrderBy
66 -> Cmd err [FacetDoc]
67 searchInCorpus cId t q o l order = runOpaQuery
68 $ filterWith o l order
69 $ queryInCorpus cId t
70 $ intercalate " | "
71 $ map stemIt q
72
73 searchCountInCorpus :: CorpusId
74 -> IsTrash
75 -> [Text]
76 -> Cmd err Int
77 searchCountInCorpus cId t q = runCountOpaQuery
78 $ queryInCorpus cId t
79 $ intercalate " | "
80 $ map stemIt q
81
82 queryInCorpus :: CorpusId
83 -> IsTrash
84 -> Text
85 -> O.Query FacetDocRead
86 queryInCorpus cId t q = proc () -> do
87 (n, nn) <- joinInCorpus -< ()
88 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
89 restrict -< if t
90 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
91 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
92 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
93 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
94 returnA -< FacetDoc (n^.ns_id )
95 (n^.ns_date )
96 (n^.ns_name )
97 (n^.ns_hyperdata)
98 (nn^.nn_category)
99 (nn^.nn_score )
100
101 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
102 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
103 where
104 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
105 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
106
107 ------------------------------------------------------------------------
108 type AuthorName = Text
109
110 -- | TODO Optim: Offset and Limit in the Query
111 -- TODO-SECURITY check
112 searchInCorpusWithContacts
113 :: CorpusId
114 -> ListId
115 -> [Text]
116 -> Maybe Offset
117 -> Maybe Limit
118 -> Maybe OrderBy
119 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
120 searchInCorpusWithContacts cId lId q o l order =
121 take (maybe 10 identity l)
122 <$> drop (maybe 0 identity o)
123 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
124 <$> toList <$> fromListWith (<>)
125 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
126 , catMaybes [Pair <$> p1 <*> p2]
127 )
128 )
129 <$> searchInCorpusWithContacts' cId lId q o l order
130
131 -- TODO-SECURITY check
132 searchInCorpusWithContacts'
133 :: CorpusId
134 -> ListId
135 -> [Text]
136 -> Maybe Offset
137 -> Maybe Limit
138 -> Maybe OrderBy
139 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
140 searchInCorpusWithContacts' cId lId q o l order =
141 runOpaQuery $ queryInCorpusWithContacts cId lId o l order
142 $ intercalate " | "
143 $ map stemIt q
144
145
146 queryInCorpusWithContacts
147 :: CorpusId
148 -> ListId
149 -> Maybe Offset
150 -> Maybe Limit
151 -> Maybe OrderBy
152 -> Text
153 -> O.Query FacetPairedRead
154 queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
155 (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
156 restrict -< (n^.ns_search) @@ (pgTSQuery $ unpack q )
157 restrict -< (n^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
158 -- restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
159 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
160 -- -- restrict -< (nng_listType nng) .== (toNullable $ pgNgramsType Authors)
161 -- restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
162 -- -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
163 returnA -< FacetPaired (n^.ns_id)
164 (n^.ns_date)
165 (n^.ns_hyperdata)
166 (pgInt4 0)
167 (contacts^.node_id, ngrams'^.ngrams_terms)
168
169 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
170 , ( NodeNodeReadNull
171 , ( NodeNodeNgramsReadNull
172 , ( NgramsReadNull
173 , ( NodeNodeNgramsReadNull
174 , NodeReadNull
175 )
176 )
177 )
178 )
179 )
180 joinInCorpusWithContacts =
181 leftJoin6
182 queryNodeTable
183 queryNodeNodeNgramsTable
184 queryNgramsTable
185 queryNodeNodeNgramsTable
186 queryNodeNodeTable
187 queryNodeSearchTable
188 cond12
189 cond23
190 cond34
191 cond45
192 cond56
193 where
194 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
195 cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
196
197 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
198 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
199
200 cond34 :: ( NodeNodeNgramsRead
201 , ( NgramsRead
202 , ( NodeNodeNgramsReadNull
203 , NodeReadNull
204 )
205 )
206 ) -> Column PGBool
207 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
208
209 cond45 :: ( NodeNodeRead
210 , ( NodeNodeNgramsRead
211 , ( NgramsReadNull
212 , ( NodeNodeNgramsReadNull
213 , NodeReadNull
214 )
215 )
216 )
217 ) -> Column PGBool
218 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
219
220 cond56 :: ( NodeSearchRead
221 , ( NodeNodeRead
222 , ( NodeNodeNgramsReadNull
223 , ( NgramsReadNull
224 , ( NodeNodeNgramsReadNull
225 , NodeReadNull
226 )
227 )
228 )
229 )
230 ) -> Column PGBool
231 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
232
233
234 newtype TSQuery = UnsafeTSQuery [Text]
235
236 -- | TODO [""] -> panic "error"
237 toTSQuery :: [Text] -> TSQuery
238 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
239
240
241 instance IsString TSQuery
242 where
243 fromString = UnsafeTSQuery . words . cs
244
245
246 instance ToField TSQuery
247 where
248 toField (UnsafeTSQuery xs)
249 = Many $ intersperse (Plain " && ")
250 $ map (\q -> Many [ Plain "plainto_tsquery("
251 , Escape (cs q)
252 , Plain ")"
253 ]
254 ) xs
255
256 data Order = Asc | Desc
257
258 instance ToField Order
259 where
260 toField Asc = Plain "ASC"
261 toField Desc = Plain "DESC"
262
263 -- TODO
264 -- FIX fav
265 -- ADD ngrams count
266 -- TESTS
267 textSearchQuery :: Query
268 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
269 \ , n.hyperdata->'title' \
270 \ , n.hyperdata->'source' \
271 \ , n.hyperdata->'authors' \
272 \ , COALESCE(nn.score,null) \
273 \ FROM nodes n \
274 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
275 \ WHERE \
276 \ n.search @@ (?::tsquery) \
277 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
278 \ AND n.typename = ? \
279 \ ORDER BY n.hyperdata -> 'publication_date' ? \
280 \ offset ? limit ?;"
281
282 -- | Text Search Function for Master Corpus
283 -- TODO : text search for user corpus
284 -- Example:
285 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
286 -- textSearchTest pId q = textSearch q pId 5 0 Asc
287 textSearch :: TSQuery -> ParentId
288 -> Limit -> Offset -> Order
289 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
290 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
291 where
292 typeId = nodeTypeId NodeDocument
293
294