]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
Merge branch 'dev-kawen' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell-gargantex...
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12
13 module Gargantext.Database.Action.Search where
14
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.Maybe
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core
26 import Gargantext.Core.Types
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
29 import Gargantext.Database.Query.Facet
30 import Gargantext.Database.Query.Filter
31 import Gargantext.Database.Query.Join (leftJoin5)
32 import Gargantext.Database.Query.Table.Node
33 import Gargantext.Database.Query.Table.NodeNode
34 import Gargantext.Database.Schema.Node
35 import Gargantext.Prelude
36 import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
37 import Opaleye hiding (Query, Order)
38 import Data.Profunctor.Product (p4)
39 import qualified Opaleye as O hiding (Order)
40
41 ------------------------------------------------------------------------
42 searchDocInDatabase :: HasDBid NodeType
43 => ParentId
44 -> Text
45 -> Cmd err [(NodeId, HyperdataDocument)]
46 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
47 where
48 -- | Global search query where ParentId is Master Node Corpus Id
49 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
50 queryDocInDatabase _ q = proc () -> do
51 row <- queryNodeSearchTable -< ()
52 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
53 restrict -< (_ns_typename row) .== (pgInt4 $ toDBid NodeDocument)
54 returnA -< (_ns_id row, _ns_hyperdata row)
55
56 ------------------------------------------------------------------------
57 -- | todo add limit and offset and order
58 searchInCorpus :: HasDBid NodeType
59 => CorpusId
60 -> IsTrash
61 -> [Text]
62 -> Maybe Offset
63 -> Maybe Limit
64 -> Maybe OrderBy
65 -> Cmd err [FacetDoc]
66 searchInCorpus cId t q o l order = runOpaQuery
67 $ filterWith o l order
68 $ queryInCorpus cId t
69 $ intercalate " | "
70 $ map stemIt q
71
72 searchCountInCorpus :: HasDBid NodeType
73 => CorpusId
74 -> IsTrash
75 -> [Text]
76 -> Cmd err Int
77 searchCountInCorpus cId t q = runCountOpaQuery
78 $ queryInCorpus cId t
79 $ intercalate " | "
80 $ map stemIt q
81
82 queryInCorpus :: HasDBid NodeType
83 => CorpusId
84 -> IsTrash
85 -> Text
86 -> O.Query FacetDocRead
87 queryInCorpus cId t q = proc () -> do
88 (n, nn) <- joinInCorpus -< ()
89 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
90 restrict -< if t
91 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
92 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
93 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
94 restrict -< (n ^. ns_typename ) .== (pgInt4 $ toDBid NodeDocument)
95 returnA -< FacetDoc (n^.ns_id )
96 (n^.ns_date )
97 (n^.ns_name )
98 (n^.ns_hyperdata )
99 (nn^.nn_category )
100 (nn^.nn_score )
101 (nn^.nn_score )
102
103 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
104 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
105 where
106 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
107 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
108
109 ------------------------------------------------------------------------
110 searchInCorpusWithContacts
111 :: HasDBid NodeType
112 => CorpusId
113 -> AnnuaireId
114 -> [Text]
115 -> Maybe Offset
116 -> Maybe Limit
117 -> Maybe OrderBy
118 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
119 searchInCorpusWithContacts cId aId q o l _order =
120 runOpaQuery $ limit' l
121 $ offset' o
122 $ orderBy ( desc _fp_score)
123 $ selectGroup cId aId
124 $ intercalate " | "
125 $ map stemIt q
126
127 selectContactViaDoc
128 :: HasDBid NodeType
129 => CorpusId
130 -> AnnuaireId
131 -> Text
132 -> QueryArr ()
133 ( Column (Nullable PGInt4)
134 , Column (Nullable PGTimestamptz)
135 , Column (Nullable PGJsonb)
136 , Column (Nullable PGInt4)
137 )
138 selectContactViaDoc cId aId q = proc () -> do
139 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
140 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
141 restrict -< (doc^.ns_typename) .== (pgInt4 $ toDBid NodeDocument)
142 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
143 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
144 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ toDBid NodeContact)
145 returnA -< ( contact^.node_id
146 , contact^.node_date
147 , contact^.node_hyperdata
148 , toNullable $ pgInt4 1
149 )
150
151 selectGroup :: HasDBid NodeType
152 => NodeId
153 -> NodeId
154 -> Text
155 -> Select FacetPairedReadNull
156 selectGroup cId aId q = proc () -> do
157 (a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
158 (selectContactViaDoc cId aId q) -< ()
159 returnA -< FacetPaired a b c d
160
161
162 queryContactViaDoc :: O.Query ( NodeSearchRead
163 , ( NodeNodeReadNull
164 , ( NodeNodeReadNull
165 , ( NodeNodeReadNull
166 , NodeReadNull
167 )
168 )
169 )
170 )
171 queryContactViaDoc =
172 leftJoin5
173 queryNodeTable
174 queryNodeNodeTable
175 queryNodeNodeTable
176 queryNodeNodeTable
177 queryNodeSearchTable
178 cond12
179 cond23
180 cond34
181 cond45
182 where
183 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
184 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
185
186 cond23 :: ( NodeNodeRead
187 , ( NodeNodeRead
188 , NodeReadNull
189 )
190 ) -> Column PGBool
191 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
192
193 cond34 :: ( NodeNodeRead
194 , ( NodeNodeRead
195 , ( NodeNodeReadNull
196 , NodeReadNull
197 )
198 )
199 ) -> Column PGBool
200 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
201
202
203 cond45 :: ( NodeSearchRead
204 , ( NodeNodeRead
205 , ( NodeNodeReadNull
206 , ( NodeNodeReadNull
207 , NodeReadNull
208 )
209 )
210 )
211 ) -> Column PGBool
212 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
213
214
215 ------------------------------------------------------------------------
216
217 newtype TSQuery = UnsafeTSQuery [Text]
218
219 -- | TODO [""] -> panic "error"
220 toTSQuery :: [Text] -> TSQuery
221 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
222
223
224 instance IsString TSQuery
225 where
226 fromString = UnsafeTSQuery . words . cs
227
228
229 instance ToField TSQuery
230 where
231 toField (UnsafeTSQuery xs)
232 = Many $ intersperse (Plain " && ")
233 $ map (\q -> Many [ Plain "plainto_tsquery("
234 , Escape (cs q)
235 , Plain ")"
236 ]
237 ) xs
238
239 data Order = Asc | Desc
240
241 instance ToField Order
242 where
243 toField Asc = Plain "ASC"
244 toField Desc = Plain "DESC"
245
246 -- TODO
247 -- FIX fav
248 -- ADD ngrams count
249 -- TESTS
250 textSearchQuery :: Query
251 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
252 \ , n.hyperdata->'title' \
253 \ , n.hyperdata->'source' \
254 \ , n.hyperdata->'authors' \
255 \ , COALESCE(nn.score,null) \
256 \ FROM nodes n \
257 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
258 \ WHERE \
259 \ n.search @@ (?::tsquery) \
260 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
261 \ AND n.typename = ? \
262 \ ORDER BY n.hyperdata -> 'publication_date' ? \
263 \ offset ? limit ?;"
264
265 -- | Text Search Function for Master Corpus
266 -- TODO : text search for user corpus
267 -- Example:
268 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
269 -- textSearchTest pId q = textSearch q pId 5 0 Asc
270 textSearch :: HasDBid NodeType
271 => TSQuery -> ParentId
272 -> Limit -> Offset -> Order
273 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
274 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
275 where
276 typeId = toDBid NodeDocument
277
278