]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
Remove inMVar and inMVarIO
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
16
17 module Gargantext.Database.TextSearch where
18
19 import Data.Aeson
20 import Data.Map.Strict hiding (map, drop, take)
21 import Data.Maybe
22 import Control.Lens ((^.))
23 import Data.List (intersperse, take, drop)
24 import Data.String (IsString(..))
25 import Data.Text (Text, words, unpack, intercalate)
26 import Data.Time (UTCTime)
27 import Database.PostgreSQL.Simple (Query)
28 import Database.PostgreSQL.Simple.ToField
29 import Gargantext.Database.Config (nodeTypeId)
30 import Gargantext.Database.Types.Node (NodeType(..))
31 import Gargantext.Prelude
32 --import Gargantext.Database.Node.Contact
33 import Gargantext.Database.Facet
34 import Gargantext.Database.Schema.Node
35 import Gargantext.Database.Schema.Ngrams
36 import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
37 import Gargantext.Database.Schema.NodeNodeNgrams
38 import Gargantext.Database.Queries.Join (leftJoin6)
39 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
40 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
41 import Gargantext.Core.Types
42 import Control.Arrow (returnA)
43 import qualified Opaleye as O hiding (Order)
44 import Opaleye hiding (Query, Order)
45
46
47 ------------------------------------------------------------------------
48 searchInDatabase :: ParentId
49 -> Text
50 -> Cmd err [(NodeId, HyperdataDocument)]
51 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
52 where
53 -- | Global search query where ParentId is Master Node Corpus Id
54 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
55 queryInDatabase _ q = proc () -> do
56 row <- queryNodeSearchTable -< ()
57 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
58 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
59 returnA -< (_ns_id row, _ns_hyperdata row)
60
61 ------------------------------------------------------------------------
62 -- | todo add limit and offset and order
63 searchInCorpus :: CorpusId
64 -> IsTrash
65 -> [Text]
66 -> Maybe Offset
67 -> Maybe Limit
68 -> Maybe OrderBy
69 -> Cmd err [FacetDoc]
70 searchInCorpus cId t q o l order = runOpaQuery
71 $ filterWith o l order
72 $ queryInCorpus cId t
73 $ intercalate " | "
74 $ map stemIt q
75
76 searchCountInCorpus :: CorpusId
77 -> IsTrash
78 -> [Text]
79 -> Cmd err Int
80 searchCountInCorpus cId t q = runCountOpaQuery
81 $ queryInCorpus cId t
82 $ intercalate " | "
83 $ map stemIt q
84
85 queryInCorpus :: CorpusId
86 -> IsTrash
87 -> Text
88 -> O.Query FacetDocRead
89 queryInCorpus cId t q = proc () -> do
90 (n, nn) <- joinInCorpus -< ()
91 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
92 restrict -< if t
93 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
94 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
95 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
96 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
97 returnA -< FacetDoc (n^.ns_id )
98 (n^.ns_date )
99 (n^.ns_name )
100 (n^.ns_hyperdata)
101 (nn^.nn_category)
102 (nn^.nn_score )
103
104 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
105 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
106 where
107 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
108 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
109
110 ------------------------------------------------------------------------
111 type AuthorName = Text
112
113 -- | TODO Optim: Offset and Limit in the Query
114 -- TODO-SECURITY check
115 searchInCorpusWithContacts
116 :: CorpusId
117 -> ListId
118 -> [Text]
119 -> Maybe Offset
120 -> Maybe Limit
121 -> Maybe OrderBy
122 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
123 searchInCorpusWithContacts cId lId q o l order =
124 take (maybe 10 identity l)
125 <$> drop (maybe 0 identity o)
126 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
127 <$> toList <$> fromListWith (<>)
128 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
129 , catMaybes [Pair <$> p1 <*> p2]
130 )
131 )
132 <$> searchInCorpusWithContacts' cId lId q o l order
133
134 -- TODO-SECURITY check
135 searchInCorpusWithContacts'
136 :: CorpusId
137 -> ListId
138 -> [Text]
139 -> Maybe Offset
140 -> Maybe Limit
141 -> Maybe OrderBy
142 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
143 searchInCorpusWithContacts' cId lId q o l order =
144 runOpaQuery $ queryInCorpusWithContacts cId lId o l order
145 $ intercalate " | "
146 $ map stemIt q
147
148
149 queryInCorpusWithContacts
150 :: CorpusId
151 -> ListId
152 -> Maybe Offset
153 -> Maybe Limit
154 -> Maybe OrderBy
155 -> Text
156 -> O.Query FacetPairedRead
157 queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
158 (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
159 restrict -< (n^.ns_search) @@ (pgTSQuery $ unpack q )
160 restrict -< (n^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
161 -- restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
162 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
163 -- -- restrict -< (nng_listType nng) .== (toNullable $ pgNgramsType Authors)
164 -- restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
165 -- -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
166 returnA -< FacetPaired (n^.ns_id)
167 (n^.ns_date)
168 (n^.ns_hyperdata)
169 (pgInt4 0)
170 (contacts^.node_id, ngrams'^.ngrams_terms)
171
172 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
173 , ( NodeNodeReadNull
174 , ( NodeNodeNgramsReadNull
175 , ( NgramsReadNull
176 , ( NodeNodeNgramsReadNull
177 , NodeReadNull
178 )
179 )
180 )
181 )
182 )
183 joinInCorpusWithContacts =
184 leftJoin6
185 queryNodeTable
186 queryNodeNodeNgramsTable
187 queryNgramsTable
188 queryNodeNodeNgramsTable
189 queryNodeNodeTable
190 queryNodeSearchTable
191 cond12
192 cond23
193 cond34
194 cond45
195 cond56
196 where
197 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
198 cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
199
200 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
201 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
202
203 cond34 :: ( NodeNodeNgramsRead
204 , ( NgramsRead
205 , ( NodeNodeNgramsReadNull
206 , NodeReadNull
207 )
208 )
209 ) -> Column PGBool
210 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
211
212 cond45 :: ( NodeNodeRead
213 , ( NodeNodeNgramsRead
214 , ( NgramsReadNull
215 , ( NodeNodeNgramsReadNull
216 , NodeReadNull
217 )
218 )
219 )
220 ) -> Column PGBool
221 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
222
223 cond56 :: ( NodeSearchRead
224 , ( NodeNodeRead
225 , ( NodeNodeNgramsReadNull
226 , ( NgramsReadNull
227 , ( NodeNodeNgramsReadNull
228 , NodeReadNull
229 )
230 )
231 )
232 )
233 ) -> Column PGBool
234 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
235
236
237 newtype TSQuery = UnsafeTSQuery [Text]
238
239 -- | TODO [""] -> panic "error"
240 toTSQuery :: [Text] -> TSQuery
241 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
242
243
244 instance IsString TSQuery
245 where
246 fromString = UnsafeTSQuery . words . cs
247
248
249 instance ToField TSQuery
250 where
251 toField (UnsafeTSQuery xs)
252 = Many $ intersperse (Plain " && ")
253 $ map (\q -> Many [ Plain "plainto_tsquery("
254 , Escape (cs q)
255 , Plain ")"
256 ]
257 ) xs
258
259 data Order = Asc | Desc
260
261 instance ToField Order
262 where
263 toField Asc = Plain "ASC"
264 toField Desc = Plain "DESC"
265
266 -- TODO
267 -- FIX fav
268 -- ADD ngrams count
269 -- TESTS
270 textSearchQuery :: Query
271 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
272 \ , n.hyperdata->'title' \
273 \ , n.hyperdata->'source' \
274 \ , n.hyperdata->'authors' \
275 \ , COALESCE(nn.score,null) \
276 \ FROM nodes n \
277 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
278 \ WHERE \
279 \ n.search @@ (?::tsquery) \
280 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
281 \ AND n.typename = ? \
282 \ ORDER BY n.hyperdata -> 'publication_date' ? \
283 \ offset ? limit ?;"
284
285 -- | Text Search Function for Master Corpus
286 -- TODO : text search for user corpus
287 -- Example:
288 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
289 -- textSearchTest pId q = textSearch q pId 5 0 Asc
290 textSearch :: TSQuery -> ParentId
291 -> Limit -> Offset -> Order
292 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
293 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
294 where
295 typeId = nodeTypeId NodeDocument
296
297