]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
Merge branch '79-dev-rewrite-better-record-syntax' into dev-corpora-from-write-nodes
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12
13 module Gargantext.Database.Action.Search where
14
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.Maybe
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core
26 import Gargantext.Core.Types
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
29 import Gargantext.Database.Query.Facet
30 import Gargantext.Database.Query.Filter
31 import Gargantext.Database.Query.Join (leftJoin5)
32 import Gargantext.Database.Query.Table.Node
33 import Gargantext.Database.Query.Table.NodeNode
34 import Gargantext.Database.Schema.Node
35 import Gargantext.Prelude
36 import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
37 import Opaleye hiding (Query, Order)
38 import Data.Profunctor.Product (p4)
39 import qualified Opaleye as O hiding (Order)
40
41 ------------------------------------------------------------------------
42 searchDocInDatabase :: HasDBid NodeType
43 => ParentId
44 -> Text
45 -> Cmd err [(NodeId, HyperdataDocument)]
46 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
47 where
48 -- | Global search query where ParentId is Master Node Corpus Id
49 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
50 queryDocInDatabase _ q = proc () -> do
51 row <- queryNodeSearchTable -< ()
52 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
53 restrict -< (_ns_typename row) .== (pgInt4 $ toDBid NodeDocument)
54 returnA -< (_ns_id row, _ns_hyperdata row)
55
56 ------------------------------------------------------------------------
57 -- | todo add limit and offset and order
58 searchInCorpus :: HasDBid NodeType
59 => CorpusId
60 -> IsTrash
61 -> [Text]
62 -> Maybe Offset
63 -> Maybe Limit
64 -> Maybe OrderBy
65 -> Cmd err [FacetDoc]
66 searchInCorpus cId t q o l order = runOpaQuery
67 $ filterWith o l order
68 $ queryInCorpus cId t
69 $ intercalate " | "
70 $ map stemIt q
71
72 searchCountInCorpus :: HasDBid NodeType
73 => CorpusId
74 -> IsTrash
75 -> [Text]
76 -> Cmd err Int
77 searchCountInCorpus cId t q = runCountOpaQuery
78 $ queryInCorpus cId t
79 $ intercalate " | "
80 $ map stemIt q
81
82 queryInCorpus :: HasDBid NodeType
83 => CorpusId
84 -> IsTrash
85 -> Text
86 -> O.Query FacetDocRead
87 queryInCorpus cId t q = proc () -> do
88 (n, nn) <- joinInCorpus -< ()
89 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
90 restrict -< if t
91 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
92 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
93 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
94 restrict -< (n ^. ns_typename ) .== (pgInt4 $ toDBid NodeDocument)
95 returnA -< FacetDoc { facetDoc_id = n^.ns_id
96 , facetDoc_created = n^.ns_date
97 , facetDoc_title = n^.ns_name
98 , facetDoc_hyperdata = n^.ns_hyperdata
99 , facetDoc_category = nn^.nn_category
100 , facetDoc_ngramCount = nn^.nn_score
101 , facetDoc_score = nn^.nn_score }
102
103 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
104 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
105 where
106 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
107 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
108
109 ------------------------------------------------------------------------
110 searchInCorpusWithContacts
111 :: HasDBid NodeType
112 => CorpusId
113 -> AnnuaireId
114 -> [Text]
115 -> Maybe Offset
116 -> Maybe Limit
117 -> Maybe OrderBy
118 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
119 searchInCorpusWithContacts cId aId q o l _order =
120 runOpaQuery $ limit' l
121 $ offset' o
122 $ orderBy ( desc _fp_score)
123 $ selectGroup cId aId
124 $ intercalate " | "
125 $ map stemIt q
126
127 selectContactViaDoc
128 :: HasDBid NodeType
129 => CorpusId
130 -> AnnuaireId
131 -> Text
132 -> QueryArr ()
133 ( Column (Nullable PGInt4)
134 , Column (Nullable PGTimestamptz)
135 , Column (Nullable PGJsonb)
136 , Column (Nullable PGInt4)
137 )
138 selectContactViaDoc cId aId q = proc () -> do
139 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
140 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
141 restrict -< (doc^.ns_typename) .== (pgInt4 $ toDBid NodeDocument)
142 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
143 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
144 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ toDBid NodeContact)
145 returnA -< ( contact^.node_id
146 , contact^.node_date
147 , contact^.node_hyperdata
148 , toNullable $ pgInt4 1
149 )
150
151 selectGroup :: HasDBid NodeType
152 => NodeId
153 -> NodeId
154 -> Text
155 -> Select FacetPairedReadNull
156 selectGroup cId aId q = proc () -> do
157 (a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
158 (selectContactViaDoc cId aId q) -< ()
159 returnA -< FacetPaired { _fp_id = a
160 , _fp_date = b
161 , _fp_hyperdata = c
162 , _fp_score = d }
163
164
165 queryContactViaDoc :: O.Query ( NodeSearchRead
166 , ( NodeNodeReadNull
167 , ( NodeNodeReadNull
168 , ( NodeNodeReadNull
169 , NodeReadNull
170 )
171 )
172 )
173 )
174 queryContactViaDoc =
175 leftJoin5
176 queryNodeTable
177 queryNodeNodeTable
178 queryNodeNodeTable
179 queryNodeNodeTable
180 queryNodeSearchTable
181 cond12
182 cond23
183 cond34
184 cond45
185 where
186 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
187 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
188
189 cond23 :: ( NodeNodeRead
190 , ( NodeNodeRead
191 , NodeReadNull
192 )
193 ) -> Column PGBool
194 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
195
196 cond34 :: ( NodeNodeRead
197 , ( NodeNodeRead
198 , ( NodeNodeReadNull
199 , NodeReadNull
200 )
201 )
202 ) -> Column PGBool
203 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
204
205
206 cond45 :: ( NodeSearchRead
207 , ( NodeNodeRead
208 , ( NodeNodeReadNull
209 , ( NodeNodeReadNull
210 , NodeReadNull
211 )
212 )
213 )
214 ) -> Column PGBool
215 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
216
217
218 ------------------------------------------------------------------------
219
220 newtype TSQuery = UnsafeTSQuery [Text]
221
222 -- | TODO [""] -> panic "error"
223 toTSQuery :: [Text] -> TSQuery
224 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
225
226
227 instance IsString TSQuery
228 where
229 fromString = UnsafeTSQuery . words . cs
230
231
232 instance ToField TSQuery
233 where
234 toField (UnsafeTSQuery xs)
235 = Many $ intersperse (Plain " && ")
236 $ map (\q -> Many [ Plain "plainto_tsquery("
237 , Escape (cs q)
238 , Plain ")"
239 ]
240 ) xs
241
242 data Order = Asc | Desc
243
244 instance ToField Order
245 where
246 toField Asc = Plain "ASC"
247 toField Desc = Plain "DESC"
248
249 -- TODO
250 -- FIX fav
251 -- ADD ngrams count
252 -- TESTS
253 textSearchQuery :: Query
254 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
255 \ , n.hyperdata->'title' \
256 \ , n.hyperdata->'source' \
257 \ , n.hyperdata->'authors' \
258 \ , COALESCE(nn.score,null) \
259 \ FROM nodes n \
260 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
261 \ WHERE \
262 \ n.search @@ (?::tsquery) \
263 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
264 \ AND n.typename = ? \
265 \ ORDER BY n.hyperdata -> 'publication_date' ? \
266 \ offset ? limit ?;"
267
268 -- | Text Search Function for Master Corpus
269 -- TODO : text search for user corpus
270 -- Example:
271 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
272 -- textSearchTest pId q = textSearch q pId 5 0 Asc
273 textSearch :: HasDBid NodeType
274 => TSQuery -> ParentId
275 -> Limit -> Offset -> Order
276 -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
277 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
278 where
279 typeId = toDBid NodeDocument
280
281