]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
[ngrams] add r_history %~ mempty to ngrams POST
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12
13 module Gargantext.Database.Action.Search where
14
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.Maybe
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core.Types
26 import Gargantext.Database.Admin.Config (nodeTypeId)
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node (NodeType(..))
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
30 import Gargantext.Database.Query.Facet
31 import Gargantext.Database.Query.Filter
32 import Gargantext.Database.Query.Join (leftJoin5)
33 import Gargantext.Database.Query.Table.Node
34 import Gargantext.Database.Query.Table.NodeNode
35 import Gargantext.Database.Schema.Node
36 import Gargantext.Prelude
37 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
38 import Opaleye hiding (Query, Order)
39 import Data.Profunctor.Product (p4)
40 import qualified Opaleye as O hiding (Order)
41
42 ------------------------------------------------------------------------
43 searchDocInDatabase :: ParentId
44 -> Text
45 -> Cmd err [(NodeId, HyperdataDocument)]
46 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
47 where
48 -- | Global search query where ParentId is Master Node Corpus Id
49 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
50 queryDocInDatabase _ q = proc () -> do
51 row <- queryNodeSearchTable -< ()
52 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
53 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
54 returnA -< (_ns_id row, _ns_hyperdata row)
55
56 ------------------------------------------------------------------------
57 -- | todo add limit and offset and order
58 searchInCorpus :: CorpusId
59 -> IsTrash
60 -> [Text]
61 -> Maybe Offset
62 -> Maybe Limit
63 -> Maybe OrderBy
64 -> Cmd err [FacetDoc]
65 searchInCorpus cId t q o l order = runOpaQuery
66 $ filterWith o l order
67 $ queryInCorpus cId t
68 $ intercalate " | "
69 $ map stemIt q
70
71 searchCountInCorpus :: CorpusId
72 -> IsTrash
73 -> [Text]
74 -> Cmd err Int
75 searchCountInCorpus cId t q = runCountOpaQuery
76 $ queryInCorpus cId t
77 $ intercalate " | "
78 $ map stemIt q
79
80 queryInCorpus :: CorpusId
81 -> IsTrash
82 -> Text
83 -> O.Query FacetDocRead
84 queryInCorpus cId t q = proc () -> do
85 (n, nn) <- joinInCorpus -< ()
86 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
87 restrict -< if t
88 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
89 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
90 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
91 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
92 returnA -< FacetDoc (n^.ns_id )
93 (n^.ns_date )
94 (n^.ns_name )
95 (n^.ns_hyperdata)
96 (nn^.nn_category)
97 (nn^.nn_score )
98
99 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
100 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
101 where
102 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
103 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
104
105 ------------------------------------------------------------------------
106 searchInCorpusWithContacts
107 :: CorpusId
108 -> AnnuaireId
109 -> [Text]
110 -> Maybe Offset
111 -> Maybe Limit
112 -> Maybe OrderBy
113 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
114 searchInCorpusWithContacts cId aId q o l _order =
115 runOpaQuery $ limit' l
116 $ offset' o
117 $ orderBy ( desc _fp_score)
118 $ group cId aId
119 $ intercalate " | "
120 $ map stemIt q
121
122 -- TODO group by
123 selectContactViaDoc
124 :: CorpusId
125 -> AnnuaireId
126 -> Text
127 -> Select FacetPairedReadNull
128 selectContactViaDoc cId aId q = proc () -> do
129 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
130 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
131 restrict -< (doc^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
132 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
133 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
134 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
135 returnA -< FacetPaired (contact^.node_id)
136 (contact^.node_date)
137 (contact^.node_hyperdata)
138 (toNullable $ pgInt4 1)
139
140
141 selectContactViaDoc'
142 :: CorpusId
143 -> AnnuaireId
144 -> Text
145 -> QueryArr ()
146 ( Column (Nullable PGInt4)
147 , Column (Nullable PGTimestamptz)
148 , Column (Nullable PGJsonb)
149 , Column (Nullable PGInt4)
150 )
151 selectContactViaDoc' cId aId q = proc () -> do
152 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
153 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
154 restrict -< (doc^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
155 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
156 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
157 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
158 returnA -< ( contact^.node_id
159 , contact^.node_date
160 , contact^.node_hyperdata
161 , toNullable $ pgInt4 1
162 )
163
164 group :: NodeId
165 -> NodeId
166 -> Text
167 -> Select FacetPairedReadNull
168 group cId aId q = proc () -> do
169 (a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
170 (selectContactViaDoc' cId aId q) -< ()
171 returnA -< FacetPaired a b c d
172
173
174
175
176
177
178
179
180 queryContactViaDoc :: O.Query ( NodeSearchRead
181 , ( NodeNodeReadNull
182 , ( NodeNodeReadNull
183 , ( NodeNodeReadNull
184 , NodeReadNull
185 )
186 )
187 )
188 )
189 queryContactViaDoc =
190 leftJoin5
191 queryNodeTable
192 queryNodeNodeTable
193 queryNodeNodeTable
194 queryNodeNodeTable
195 queryNodeSearchTable
196 cond12
197 cond23
198 cond34
199 cond45
200 where
201 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
202 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
203
204 cond23 :: ( NodeNodeRead
205 , ( NodeNodeRead
206 , NodeReadNull
207 )
208 ) -> Column PGBool
209 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
210
211 cond34 :: ( NodeNodeRead
212 , ( NodeNodeRead
213 , ( NodeNodeReadNull
214 , NodeReadNull
215 )
216 )
217 ) -> Column PGBool
218 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
219
220
221 cond45 :: ( NodeSearchRead
222 , ( NodeNodeRead
223 , ( NodeNodeReadNull
224 , ( NodeNodeReadNull
225 , NodeReadNull
226 )
227 )
228 )
229 ) -> Column PGBool
230 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
231
232
233 ------------------------------------------------------------------------
234
235 newtype TSQuery = UnsafeTSQuery [Text]
236
237 -- | TODO [""] -> panic "error"
238 toTSQuery :: [Text] -> TSQuery
239 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
240
241
242 instance IsString TSQuery
243 where
244 fromString = UnsafeTSQuery . words . cs
245
246
247 instance ToField TSQuery
248 where
249 toField (UnsafeTSQuery xs)
250 = Many $ intersperse (Plain " && ")
251 $ map (\q -> Many [ Plain "plainto_tsquery("
252 , Escape (cs q)
253 , Plain ")"
254 ]
255 ) xs
256
257 data Order = Asc | Desc
258
259 instance ToField Order
260 where
261 toField Asc = Plain "ASC"
262 toField Desc = Plain "DESC"
263
264 -- TODO
265 -- FIX fav
266 -- ADD ngrams count
267 -- TESTS
268 textSearchQuery :: Query
269 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
270 \ , n.hyperdata->'title' \
271 \ , n.hyperdata->'source' \
272 \ , n.hyperdata->'authors' \
273 \ , COALESCE(nn.score,null) \
274 \ FROM nodes n \
275 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
276 \ WHERE \
277 \ n.search @@ (?::tsquery) \
278 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
279 \ AND n.typename = ? \
280 \ ORDER BY n.hyperdata -> 'publication_date' ? \
281 \ offset ? limit ?;"
282
283 -- | Text Search Function for Master Corpus
284 -- TODO : text search for user corpus
285 -- Example:
286 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
287 -- textSearchTest pId q = textSearch q pId 5 0 Asc
288 textSearch :: TSQuery -> ParentId
289 -> Limit -> Offset -> Order
290 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
291 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
292 where
293 typeId = nodeTypeId NodeDocument
294
295