]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
Merge branch 'dev' into dev-doc-annotation-issue
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12
13 module Gargantext.Database.Action.Search where
14
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.Maybe
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core.Types
26 import Gargantext.Database.Admin.Config (nodeTypeId)
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node (NodeType(..))
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
30 import Gargantext.Database.Query.Facet
31 import Gargantext.Database.Query.Filter
32 import Gargantext.Database.Query.Join (leftJoin5)
33 import Gargantext.Database.Query.Table.Node
34 import Gargantext.Database.Query.Table.NodeNode
35 import Gargantext.Database.Schema.Node
36 import Gargantext.Prelude
37 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
38 import Opaleye hiding (Query, Order)
39 import Data.Profunctor.Product (p4)
40 import qualified Opaleye as O hiding (Order)
41
42 ------------------------------------------------------------------------
43 searchDocInDatabase :: ParentId
44 -> Text
45 -> Cmd err [(NodeId, HyperdataDocument)]
46 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
47 where
48 -- | Global search query where ParentId is Master Node Corpus Id
49 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
50 queryDocInDatabase _ q = proc () -> do
51 row <- queryNodeSearchTable -< ()
52 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
53 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
54 returnA -< (_ns_id row, _ns_hyperdata row)
55
56 ------------------------------------------------------------------------
57 -- | todo add limit and offset and order
58 searchInCorpus :: CorpusId
59 -> IsTrash
60 -> [Text]
61 -> Maybe Offset
62 -> Maybe Limit
63 -> Maybe OrderBy
64 -> Cmd err [FacetDoc]
65 searchInCorpus cId t q o l order = runOpaQuery
66 $ filterWith o l order
67 $ queryInCorpus cId t
68 $ intercalate " | "
69 $ map stemIt q
70
71 searchCountInCorpus :: CorpusId
72 -> IsTrash
73 -> [Text]
74 -> Cmd err Int
75 searchCountInCorpus cId t q = runCountOpaQuery
76 $ queryInCorpus cId t
77 $ intercalate " | "
78 $ map stemIt q
79
80 queryInCorpus :: CorpusId
81 -> IsTrash
82 -> Text
83 -> O.Query FacetDocRead
84 queryInCorpus cId t q = proc () -> do
85 (n, nn) <- joinInCorpus -< ()
86 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
87 restrict -< if t
88 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
89 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
90 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
91 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
92 returnA -< FacetDoc (n^.ns_id )
93 (n^.ns_date )
94 (n^.ns_name )
95 (n^.ns_hyperdata)
96 (nn^.nn_category)
97 (nn^.nn_score )
98
99 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
100 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
101 where
102 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
103 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
104
105 ------------------------------------------------------------------------
106 searchInCorpusWithContacts
107 :: CorpusId
108 -> AnnuaireId
109 -> [Text]
110 -> Maybe Offset
111 -> Maybe Limit
112 -> Maybe OrderBy
113 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
114 searchInCorpusWithContacts cId aId q o l _order =
115 runOpaQuery $ limit' l
116 $ offset' o
117 $ orderBy ( desc _fp_score)
118 $ selectGroup cId aId
119 $ intercalate " | "
120 $ map stemIt q
121
122 selectContactViaDoc
123 :: CorpusId
124 -> AnnuaireId
125 -> Text
126 -> QueryArr ()
127 ( Column (Nullable PGInt4)
128 , Column (Nullable PGTimestamptz)
129 , Column (Nullable PGJsonb)
130 , Column (Nullable PGInt4)
131 )
132 selectContactViaDoc cId aId q = proc () -> do
133 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
134 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
135 restrict -< (doc^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
136 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
137 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
138 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
139 returnA -< ( contact^.node_id
140 , contact^.node_date
141 , contact^.node_hyperdata
142 , toNullable $ pgInt4 1
143 )
144
145 selectGroup :: NodeId
146 -> NodeId
147 -> Text
148 -> Select FacetPairedReadNull
149 selectGroup cId aId q = proc () -> do
150 (a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
151 (selectContactViaDoc cId aId q) -< ()
152 returnA -< FacetPaired a b c d
153
154
155
156
157
158
159
160
161 queryContactViaDoc :: O.Query ( NodeSearchRead
162 , ( NodeNodeReadNull
163 , ( NodeNodeReadNull
164 , ( NodeNodeReadNull
165 , NodeReadNull
166 )
167 )
168 )
169 )
170 queryContactViaDoc =
171 leftJoin5
172 queryNodeTable
173 queryNodeNodeTable
174 queryNodeNodeTable
175 queryNodeNodeTable
176 queryNodeSearchTable
177 cond12
178 cond23
179 cond34
180 cond45
181 where
182 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
183 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
184
185 cond23 :: ( NodeNodeRead
186 , ( NodeNodeRead
187 , NodeReadNull
188 )
189 ) -> Column PGBool
190 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
191
192 cond34 :: ( NodeNodeRead
193 , ( NodeNodeRead
194 , ( NodeNodeReadNull
195 , NodeReadNull
196 )
197 )
198 ) -> Column PGBool
199 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
200
201
202 cond45 :: ( NodeSearchRead
203 , ( NodeNodeRead
204 , ( NodeNodeReadNull
205 , ( NodeNodeReadNull
206 , NodeReadNull
207 )
208 )
209 )
210 ) -> Column PGBool
211 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
212
213
214 ------------------------------------------------------------------------
215
216 newtype TSQuery = UnsafeTSQuery [Text]
217
218 -- | TODO [""] -> panic "error"
219 toTSQuery :: [Text] -> TSQuery
220 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
221
222
223 instance IsString TSQuery
224 where
225 fromString = UnsafeTSQuery . words . cs
226
227
228 instance ToField TSQuery
229 where
230 toField (UnsafeTSQuery xs)
231 = Many $ intersperse (Plain " && ")
232 $ map (\q -> Many [ Plain "plainto_tsquery("
233 , Escape (cs q)
234 , Plain ")"
235 ]
236 ) xs
237
238 data Order = Asc | Desc
239
240 instance ToField Order
241 where
242 toField Asc = Plain "ASC"
243 toField Desc = Plain "DESC"
244
245 -- TODO
246 -- FIX fav
247 -- ADD ngrams count
248 -- TESTS
249 textSearchQuery :: Query
250 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
251 \ , n.hyperdata->'title' \
252 \ , n.hyperdata->'source' \
253 \ , n.hyperdata->'authors' \
254 \ , COALESCE(nn.score,null) \
255 \ FROM nodes n \
256 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
257 \ WHERE \
258 \ n.search @@ (?::tsquery) \
259 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
260 \ AND n.typename = ? \
261 \ ORDER BY n.hyperdata -> 'publication_date' ? \
262 \ offset ? limit ?;"
263
264 -- | Text Search Function for Master Corpus
265 -- TODO : text search for user corpus
266 -- Example:
267 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
268 -- textSearchTest pId q = textSearch q pId 5 0 Asc
269 textSearch :: TSQuery -> ParentId
270 -> Limit -> Offset -> Order
271 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
272 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
273 where
274 typeId = nodeTypeId NodeDocument
275
276