]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
change beta to 1 / beta
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15
16 module Gargantext.Database.TextSearch where
17
18 import Data.Aeson
19 import Data.Map.Strict hiding (map, drop, take)
20 import Data.Maybe
21 import Control.Lens ((^.))
22 import Data.List (intersperse, take, drop)
23 import Data.String (IsString(..))
24 import Data.Text (Text, words, unpack, intercalate)
25 import Data.Time (UTCTime)
26 import Database.PostgreSQL.Simple (Query)
27 import Database.PostgreSQL.Simple.ToField
28 import Gargantext.Database.Config (nodeTypeId)
29 import Gargantext.Database.Types.Node (NodeType(..))
30 import Gargantext.Prelude
31 --import Gargantext.Database.Node.Contact
32 import Gargantext.Database.Facet
33 import Gargantext.Database.Schema.Node
34 import Gargantext.Database.Schema.Ngrams
35 import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
36 import Gargantext.Database.Schema.NodeNodeNgrams
37 import Gargantext.Database.Queries.Join (leftJoin6)
38 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
39 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
40 import Gargantext.Core.Types
41 import Control.Arrow (returnA)
42 import qualified Opaleye as O hiding (Order)
43 import Opaleye hiding (Query, Order)
44
45
46 ------------------------------------------------------------------------
47 searchInDatabase :: ParentId
48 -> Text
49 -> Cmd err [(NodeId, HyperdataDocument)]
50 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
51 where
52 -- | Global search query where ParentId is Master Node Corpus Id
53 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
54 queryInDatabase _ q = proc () -> do
55 row <- queryNodeSearchTable -< ()
56 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
57 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
58 returnA -< (_ns_id row, _ns_hyperdata row)
59
60 ------------------------------------------------------------------------
61 -- | todo add limit and offset and order
62 searchInCorpus :: CorpusId
63 -> IsTrash
64 -> [Text]
65 -> Maybe Offset
66 -> Maybe Limit
67 -> Maybe OrderBy
68 -> Cmd err [FacetDoc]
69 searchInCorpus cId t q o l order = runOpaQuery
70 $ filterWith o l order
71 $ queryInCorpus cId t
72 $ intercalate " | "
73 $ map stemIt q
74
75 searchCountInCorpus :: CorpusId
76 -> IsTrash
77 -> [Text]
78 -> Cmd err Int
79 searchCountInCorpus cId t q = runCountOpaQuery
80 $ queryInCorpus cId t
81 $ intercalate " | "
82 $ map stemIt q
83
84 queryInCorpus :: CorpusId
85 -> IsTrash
86 -> Text
87 -> O.Query FacetDocRead
88 queryInCorpus cId t q = proc () -> do
89 (n, nn) <- joinInCorpus -< ()
90 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
91 restrict -< if t
92 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
93 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
94 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
95 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
96 returnA -< FacetDoc (n^.ns_id )
97 (n^.ns_date )
98 (n^.ns_name )
99 (n^.ns_hyperdata)
100 (nn^.nn_category)
101 (nn^.nn_score )
102
103 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
104 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
105 where
106 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
107 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
108
109 ------------------------------------------------------------------------
110 type AuthorName = Text
111
112 -- | TODO Optim: Offset and Limit in the Query
113 -- TODO-SECURITY check
114 searchInCorpusWithContacts
115 :: CorpusId
116 -> ListId
117 -> [Text]
118 -> Maybe Offset
119 -> Maybe Limit
120 -> Maybe OrderBy
121 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
122 searchInCorpusWithContacts cId lId q o l order =
123 take (maybe 10 identity l)
124 <$> drop (maybe 0 identity o)
125 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
126 <$> toList <$> fromListWith (<>)
127 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
128 , catMaybes [Pair <$> p1 <*> p2]
129 )
130 )
131 <$> searchInCorpusWithContacts' cId lId q o l order
132
133 -- TODO-SECURITY check
134 searchInCorpusWithContacts'
135 :: CorpusId
136 -> ListId
137 -> [Text]
138 -> Maybe Offset
139 -> Maybe Limit
140 -> Maybe OrderBy
141 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
142 searchInCorpusWithContacts' cId lId q o l order =
143 runOpaQuery $ queryInCorpusWithContacts cId lId o l order
144 $ intercalate " | "
145 $ map stemIt q
146
147
148 queryInCorpusWithContacts
149 :: CorpusId
150 -> ListId
151 -> Maybe Offset
152 -> Maybe Limit
153 -> Maybe OrderBy
154 -> Text
155 -> O.Query FacetPairedRead
156 queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
157 (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
158 restrict -< (n^.ns_search) @@ (pgTSQuery $ unpack q )
159 restrict -< (n^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
160 -- restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
161 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
162 -- -- restrict -< (nng_listType nng) .== (toNullable $ pgNgramsType Authors)
163 -- restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
164 -- -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
165 returnA -< FacetPaired (n^.ns_id)
166 (n^.ns_date)
167 (n^.ns_hyperdata)
168 (pgInt4 0)
169 (contacts^.node_id, ngrams'^.ngrams_terms)
170
171 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
172 , ( NodeNodeReadNull
173 , ( NodeNodeNgramsReadNull
174 , ( NgramsReadNull
175 , ( NodeNodeNgramsReadNull
176 , NodeReadNull
177 )
178 )
179 )
180 )
181 )
182 joinInCorpusWithContacts =
183 leftJoin6
184 queryNodeTable
185 queryNodeNodeNgramsTable
186 queryNgramsTable
187 queryNodeNodeNgramsTable
188 queryNodeNodeTable
189 queryNodeSearchTable
190 cond12
191 cond23
192 cond34
193 cond45
194 cond56
195 where
196 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
197 cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
198
199 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
200 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
201
202 cond34 :: ( NodeNodeNgramsRead
203 , ( NgramsRead
204 , ( NodeNodeNgramsReadNull
205 , NodeReadNull
206 )
207 )
208 ) -> Column PGBool
209 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
210
211 cond45 :: ( NodeNodeRead
212 , ( NodeNodeNgramsRead
213 , ( NgramsReadNull
214 , ( NodeNodeNgramsReadNull
215 , NodeReadNull
216 )
217 )
218 )
219 ) -> Column PGBool
220 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
221
222 cond56 :: ( NodeSearchRead
223 , ( NodeNodeRead
224 , ( NodeNodeNgramsReadNull
225 , ( NgramsReadNull
226 , ( NodeNodeNgramsReadNull
227 , NodeReadNull
228 )
229 )
230 )
231 )
232 ) -> Column PGBool
233 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
234
235
236 newtype TSQuery = UnsafeTSQuery [Text]
237
238 -- | TODO [""] -> panic "error"
239 toTSQuery :: [Text] -> TSQuery
240 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
241
242
243 instance IsString TSQuery
244 where
245 fromString = UnsafeTSQuery . words . cs
246
247
248 instance ToField TSQuery
249 where
250 toField (UnsafeTSQuery xs)
251 = Many $ intersperse (Plain " && ")
252 $ map (\q -> Many [ Plain "plainto_tsquery("
253 , Escape (cs q)
254 , Plain ")"
255 ]
256 ) xs
257
258 data Order = Asc | Desc
259
260 instance ToField Order
261 where
262 toField Asc = Plain "ASC"
263 toField Desc = Plain "DESC"
264
265 -- TODO
266 -- FIX fav
267 -- ADD ngrams count
268 -- TESTS
269 textSearchQuery :: Query
270 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
271 \ , n.hyperdata->'title' \
272 \ , n.hyperdata->'source' \
273 \ , n.hyperdata->'authors' \
274 \ , COALESCE(nn.score,null) \
275 \ FROM nodes n \
276 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
277 \ WHERE \
278 \ n.search @@ (?::tsquery) \
279 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
280 \ AND n.typename = ? \
281 \ ORDER BY n.hyperdata -> 'publication_date' ? \
282 \ offset ? limit ?;"
283
284 -- | Text Search Function for Master Corpus
285 -- TODO : text search for user corpus
286 -- Example:
287 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
288 -- textSearchTest pId q = textSearch q pId 5 0 Asc
289 textSearch :: TSQuery -> ParentId
290 -> Limit -> Offset -> Order
291 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
292 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
293 where
294 typeId = nodeTypeId NodeDocument
295
296