1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
16 module Gargantext.Database.TextSearch where
18 import Data.Aeson
19 import Data.Map.Strict hiding (map, drop, take)
20 import Data.Maybe
21 import Control.Lens ((^.))
22 import Data.List (intersperse, take, drop)
23 import Data.String (IsString(..))
24 import Data.Text (Text, words, unpack, intercalate)
25 import Data.Time (UTCTime)
26 import Database.PostgreSQL.Simple (Query)
27 import Database.PostgreSQL.Simple.ToField
28 import Gargantext.Database.Config (nodeTypeId)
29 import Gargantext.Database.Types.Node (NodeType(..))
30 import Gargantext.Prelude
31 --import Gargantext.Database.Node.Contact
32 import Gargantext.Database.Facet
33 import Gargantext.Database.Schema.Node
34 import Gargantext.Database.Schema.Ngrams
35 import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
36 import Gargantext.Database.Schema.NodeNodeNgrams
37 import Gargantext.Database.Queries.Join (leftJoin6)
38 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
39 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
40 import Gargantext.Core.Types
41 import Control.Arrow (returnA)
42 import qualified Opaleye as O hiding (Order)
43 import Opaleye hiding (Query, Order)
46 ------------------------------------------------------------------------
47 searchInDatabase :: ParentId
48 -> Text
49 -> Cmd err [(NodeId, HyperdataDocument)]
50 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
51 where
52 -- | Global search query where ParentId is Master Node Corpus Id
53 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
54 queryInDatabase _ q = proc () -> do
55 row <- queryNodeSearchTable -< ()
56 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
57 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
58 returnA -< (_ns_id row, _ns_hyperdata row)
60 ------------------------------------------------------------------------
61 -- | todo add limit and offset and order
62 searchInCorpus :: CorpusId
63 -> IsTrash
64 -> [Text]
65 -> Maybe Offset
66 -> Maybe Limit
67 -> Maybe OrderBy
68 -> Cmd err [FacetDoc]
69 searchInCorpus cId t q o l order = runOpaQuery
70 $ filterWith o l order
71 $ queryInCorpus cId t
72 $ intercalate " | "
73 $ map stemIt q
75 searchCountInCorpus :: CorpusId
76 -> IsTrash
77 -> [Text]
78 -> Cmd err Int
79 searchCountInCorpus cId t q = runCountOpaQuery
80 $ queryInCorpus cId t
81 $ intercalate " | "
82 $ map stemIt q
84 queryInCorpus :: CorpusId
85 -> IsTrash
86 -> Text
87 -> O.Query FacetDocRead
88 queryInCorpus cId t q = proc () -> do
89 (n, nn) <- joinInCorpus -< ()
90 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
91 restrict -< if t
92 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
93 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
94 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
95 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
96 returnA -< FacetDoc (n^.ns_id )
97 (n^.ns_date )
98 (n^.ns_name )
99 (n^.ns_hyperdata)
100 (nn^.nn_category)
101 (nn^.nn_score )
103 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
104 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
105 where
106 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
107 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
109 ------------------------------------------------------------------------
110 type AuthorName = Text
112 -- | TODO Optim: Offset and Limit in the Query
113 -- TODO-SECURITY check
114 searchInCorpusWithContacts
115 :: CorpusId
116 -> ListId
117 -> [Text]
118 -> Maybe Offset
119 -> Maybe Limit
120 -> Maybe OrderBy
121 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
122 searchInCorpusWithContacts cId lId q o l order =
123 take (maybe 10 identity l)
124 <$> drop (maybe 0 identity o)
125 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
126 <$> toList <$> fromListWith (<>)
127 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
128 , catMaybes [Pair <$> p1 <*> p2]
129 )
130 )
131 <$> searchInCorpusWithContacts' cId lId q o l order
133 -- TODO-SECURITY check
134 searchInCorpusWithContacts'
135 :: CorpusId
136 -> ListId
137 -> [Text]
138 -> Maybe Offset
139 -> Maybe Limit
140 -> Maybe OrderBy
141 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
142 searchInCorpusWithContacts' cId lId q o l order =
143 runOpaQuery $ queryInCorpusWithContacts cId lId o l order
144 $ intercalate " | "
145 $ map stemIt q
148 queryInCorpusWithContacts
149 :: CorpusId
150 -> ListId
151 -> Maybe Offset
152 -> Maybe Limit
153 -> Maybe OrderBy
154 -> Text
155 -> O.Query FacetPairedRead
156 queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
157 (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
158 restrict -< (n^.ns_search) @@ (pgTSQuery $ unpack q )
159 restrict -< (n^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
160 -- restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
161 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
162 -- -- restrict -< (nng_listType nng) .== (toNullable $ pgNgramsType Authors)
163 -- restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
164 -- -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
165 returnA -< FacetPaired (n^.ns_id)
166 (n^.ns_date)
167 (n^.ns_hyperdata)
168 (pgInt4 0)
169 (contacts^.node_id, ngrams'^.ngrams_terms)
171 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
172 , ( NodeNodeReadNull
173 , ( NodeNodeNgramsReadNull
174 , ( NgramsReadNull
175 , ( NodeNodeNgramsReadNull
176 , NodeReadNull
177 )
178 )
179 )
180 )
181 )
182 joinInCorpusWithContacts =
183 leftJoin6
184 queryNodeTable
185 queryNodeNodeNgramsTable
186 queryNgramsTable
187 queryNodeNodeNgramsTable
188 queryNodeNodeTable
189 queryNodeSearchTable
190 cond12
191 cond23
192 cond34
193 cond45
194 cond56
195 where
196 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
197 cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
199 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
200 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
202 cond34 :: ( NodeNodeNgramsRead
203 , ( NgramsRead
204 , ( NodeNodeNgramsReadNull
205 , NodeReadNull
206 )
207 )
208 ) -> Column PGBool
209 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
211 cond45 :: ( NodeNodeRead
212 , ( NodeNodeNgramsRead
213 , ( NgramsReadNull
214 , ( NodeNodeNgramsReadNull
215 , NodeReadNull
216 )
217 )
218 )
219 ) -> Column PGBool
220 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
222 cond56 :: ( NodeSearchRead
223 , ( NodeNodeRead
224 , ( NodeNodeNgramsReadNull
225 , ( NgramsReadNull
226 , ( NodeNodeNgramsReadNull
227 , NodeReadNull
228 )
229 )
230 )
231 )
232 ) -> Column PGBool
233 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
236 newtype TSQuery = UnsafeTSQuery [Text]
238 -- | TODO [""] -> panic "error"
239 toTSQuery :: [Text] -> TSQuery
240 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
243 instance IsString TSQuery
244 where
245 fromString = UnsafeTSQuery . words . cs
248 instance ToField TSQuery
249 where
250 toField (UnsafeTSQuery xs)
251 = Many $ intersperse (Plain " && ")
252 $ map (\q -> Many [ Plain "plainto_tsquery("
253 , Escape (cs q)
254 , Plain ")"
255 ]
256 ) xs
258 data Order = Asc | Desc
260 instance ToField Order
261 where
262 toField Asc = Plain "ASC"
263 toField Desc = Plain "DESC"
265 -- TODO
266 -- FIX fav
267 -- ADD ngrams count
268 -- TESTS
269 textSearchQuery :: Query
270 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
271 \ , n.hyperdata->'title' \
272 \ , n.hyperdata->'source' \
273 \ , n.hyperdata->'authors' \
274 \ , COALESCE(nn.score,null) \
275 \ FROM nodes n \
276 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
277 \ WHERE \
278 \ n.search @@ (?::tsquery) \
279 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
280 \ AND n.typename = ? \
281 \ ORDER BY n.hyperdata -> 'publication_date' ? \
282 \ offset ? limit ?;"
284 -- | Text Search Function for Master Corpus
285 -- TODO : text search for user corpus
286 -- Example:
287 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
288 -- textSearchTest pId q = textSearch q pId 5 0 Asc
289 textSearch :: TSQuery -> ParentId
290 -> Limit -> Offset -> Order
291 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
292 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
293 where
294 typeId = nodeTypeId NodeDocument