]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
getTableNgrams: display timing information
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15
16 module Gargantext.Database.TextSearch where
17
18 import Data.Aeson
19 import Data.Map.Strict hiding (map, drop, take)
20 import Data.Maybe
21 import Control.Lens ((^.))
22 import Data.List (intersperse, take, drop)
23 import Data.String (IsString(..))
24 import Data.Text (Text, words, unpack, intercalate)
25 import Data.Time (UTCTime)
26 import Database.PostgreSQL.Simple (Query)
27 import Database.PostgreSQL.Simple.ToField
28 import Gargantext.Database.Config (nodeTypeId)
29 import Gargantext.Database.Types.Node (NodeType(..))
30 import Gargantext.Prelude
31 --import Gargantext.Database.Node.Contact
32 import Gargantext.Database.Facet
33 import Gargantext.Database.Schema.Node
34 import Gargantext.Database.Schema.Ngrams
35 import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
36 import Gargantext.Database.Schema.NodeNodeNgrams
37 import Gargantext.Database.Queries.Join (leftJoin6)
38 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery)
39 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
40 import Gargantext.Core.Types
41 import Control.Arrow (returnA)
42 import qualified Opaleye as O hiding (Order)
43 import Opaleye hiding (Query, Order)
44
45
46 ------------------------------------------------------------------------
47 searchInDatabase :: ParentId
48 -> Text
49 -> Cmd err [(NodeId, HyperdataDocument)]
50 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
51 where
52 -- | Global search query where ParentId is Master Node Corpus Id
53 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
54 queryInDatabase _ q = proc () -> do
55 row <- queryNodeSearchTable -< ()
56 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
57 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
58 returnA -< (_ns_id row, _ns_hyperdata row)
59
60 ------------------------------------------------------------------------
61 -- | todo add limit and offset and order
62 searchInCorpus :: CorpusId
63 -> IsTrash
64 -> [Text]
65 -> Maybe Offset
66 -> Maybe Limit
67 -> Maybe OrderBy
68 -> Cmd err [FacetDoc]
69 searchInCorpus cId t q o l order = runOpaQuery (filterWith o l order $ queryInCorpus cId t q')
70 where
71 q' = intercalate " | " $ map stemIt q
72
73 queryInCorpus :: CorpusId
74 -> IsTrash
75 -> Text
76 -> O.Query FacetDocRead
77 queryInCorpus cId t q = proc () -> do
78 (n, nn) <- joinInCorpus -< ()
79 restrict -< ( nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
80 restrict -< if t
81 then ( nn^.nn_category) .== (toNullable $ pgInt4 0)
82 else ( nn^.nn_category) .>= (toNullable $ pgInt4 1)
83 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
84 restrict -< (n ^. ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
85 returnA -< FacetDoc (n^.ns_id )
86 (n^.ns_date )
87 (n^.ns_name )
88 (n^.ns_hyperdata)
89 (nn^.nn_category)
90 (nn^.nn_score )
91
92 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
93 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
94 where
95 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
96 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
97
98 ------------------------------------------------------------------------
99 type AuthorName = Text
100
101 -- | TODO Optim: Offset and Limit in the Query
102 -- TODO-SECURITY check
103 searchInCorpusWithContacts
104 :: CorpusId
105 -> ListId
106 -> [Text]
107 -> Maybe Offset
108 -> Maybe Limit
109 -> Maybe OrderBy
110 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
111 searchInCorpusWithContacts cId lId q o l order =
112 take (maybe 10 identity l)
113 <$> drop (maybe 0 identity o)
114 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
115 <$> toList <$> fromListWith (<>)
116 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
117 , catMaybes [Pair <$> p1 <*> p2]
118 )
119 )
120 <$> searchInCorpusWithContacts' cId lId q o l order
121
122 -- TODO-SECURITY check
123 searchInCorpusWithContacts'
124 :: CorpusId
125 -> ListId
126 -> [Text]
127 -> Maybe Offset
128 -> Maybe Limit
129 -> Maybe OrderBy
130 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
131 searchInCorpusWithContacts' cId lId q o l order =
132 runOpaQuery $ queryInCorpusWithContacts cId lId q' o l order
133 where
134 q' = intercalate " | " $ map stemIt q
135
136
137 queryInCorpusWithContacts
138 :: CorpusId
139 -> ListId
140 -> Text
141 -> Maybe Offset
142 -> Maybe Limit
143 -> Maybe OrderBy
144 -> O.Query FacetPairedRead
145 queryInCorpusWithContacts cId lId q _ _ _ = proc () -> do
146 (docs, (corpusDoc, (docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
147 restrict -< (docs^.ns_search) @@ (pgTSQuery $ unpack q )
148 restrict -< (docs^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
149 restrict -< (docNgrams^.nnng_node2_id) .== (toNullable $ pgNodeId lId)
150 restrict -< (corpusDoc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
151 -- restrict -< (nng_listType docNgrams) .== (toNullable $ pgNgramsType Authors)
152 restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
153 -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
154 returnA -< FacetPaired (docs^.ns_id) (docs^.ns_date) (docs^.ns_hyperdata) (pgInt4 0) (contacts^.node_id, ngrams'^.ngrams_terms)
155
156 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
157 , ( NodeNodeReadNull
158 , ( NodeNodeNgramsReadNull
159 , ( NgramsReadNull
160 , ( NodeNodeNgramsReadNull
161 , NodeReadNull
162 )
163 )
164 )
165 )
166 )
167 joinInCorpusWithContacts =
168 leftJoin6
169 queryNodeTable
170 queryNodeNodeNgramsTable
171 queryNgramsTable
172 queryNodeNodeNgramsTable
173 queryNodeNodeTable
174 queryNodeSearchTable
175 cond12
176 cond23
177 cond34
178 cond45
179 cond56
180 where
181 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
182 cond12 (ng3, n2) = n2^.node_id .== ng3^.nnng_node1_id
183
184 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
185 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
186
187 cond34 :: ( NodeNodeNgramsRead
188 , ( NgramsRead
189 , ( NodeNodeNgramsReadNull
190 , NodeReadNull
191 )
192 )
193 ) -> Column PGBool
194 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
195
196 cond45 :: ( NodeNodeRead
197 , ( NodeNodeNgramsRead
198 , ( NgramsReadNull
199 , ( NodeNodeNgramsReadNull
200 , NodeReadNull
201 )
202 )
203 )
204 ) -> Column PGBool
205 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
206
207 cond56 :: ( NodeSearchRead
208 , ( NodeNodeRead
209 , ( NodeNodeNgramsReadNull
210 , ( NgramsReadNull
211 , ( NodeNodeNgramsReadNull
212 , NodeReadNull
213 )
214 )
215 )
216 )
217 ) -> Column PGBool
218 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
219
220
221 newtype TSQuery = UnsafeTSQuery [Text]
222
223 -- | TODO [""] -> panic "error"
224 toTSQuery :: [Text] -> TSQuery
225 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
226
227
228 instance IsString TSQuery
229 where
230 fromString = UnsafeTSQuery . words . cs
231
232
233 instance ToField TSQuery
234 where
235 toField (UnsafeTSQuery xs)
236 = Many $ intersperse (Plain " && ")
237 $ map (\q -> Many [ Plain "plainto_tsquery("
238 , Escape (cs q)
239 , Plain ")"
240 ]
241 ) xs
242
243 data Order = Asc | Desc
244
245 instance ToField Order
246 where
247 toField Asc = Plain "ASC"
248 toField Desc = Plain "DESC"
249
250 -- TODO
251 -- FIX fav
252 -- ADD ngrams count
253 -- TESTS
254 textSearchQuery :: Query
255 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
256 \ , n.hyperdata->'title' \
257 \ , n.hyperdata->'source' \
258 \ , n.hyperdata->'authors' \
259 \ , COALESCE(nn.score,null) \
260 \ FROM nodes n \
261 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
262 \ WHERE \
263 \ n.search @@ (?::tsquery) \
264 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
265 \ AND n.typename = ? \
266 \ ORDER BY n.hyperdata -> 'publication_date' ? \
267 \ offset ? limit ?;"
268
269 -- | Text Search Function for Master Corpus
270 -- TODO : text search for user corpus
271 -- Example:
272 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
273 -- textSearchTest pId q = textSearch q pId 5 0 Asc
274 textSearch :: TSQuery -> ParentId
275 -> Limit -> Offset -> Order
276 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
277 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
278 where
279 typeId = nodeTypeId NodeDocument
280
281