]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
[FIX] Map Text -> HashMap NgramsTerm
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12
13 module Gargantext.Database.Action.Search where
14
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.Maybe
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core.Types
26 import Gargantext.Database.Admin.Config (nodeTypeId)
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
29 import Gargantext.Database.Query.Facet
30 import Gargantext.Database.Query.Filter
31 import Gargantext.Database.Query.Join (leftJoin5)
32 import Gargantext.Database.Query.Table.Node
33 import Gargantext.Database.Query.Table.NodeNode
34 import Gargantext.Database.Schema.Node
35 import Gargantext.Prelude
36 import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
37 import Opaleye hiding (Query, Order)
38 import Data.Profunctor.Product (p4)
39 import qualified Opaleye as O hiding (Order)
40
41 ------------------------------------------------------------------------
42 searchDocInDatabase :: ParentId
43 -> Text
44 -> Cmd err [(NodeId, HyperdataDocument)]
45 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
46 where
47 -- | Global search query where ParentId is Master Node Corpus Id
48 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
49 queryDocInDatabase _ q = proc () -> do
50 row <- queryNodeSearchTable -< ()
51 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
52 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
53 returnA -< (_ns_id row, _ns_hyperdata row)
54
55 ------------------------------------------------------------------------
56 -- | todo add limit and offset and order
57 searchInCorpus :: CorpusId
58 -> IsTrash
59 -> [Text]
60 -> Maybe Offset
61 -> Maybe Limit
62 -> Maybe OrderBy
63 -> Cmd err [FacetDoc]
64 searchInCorpus cId t q o l order = runOpaQuery
65 $ filterWith o l order
66 $ queryInCorpus cId t
67 $ intercalate " | "
68 $ map stemIt q
69
70 searchCountInCorpus :: CorpusId
71 -> IsTrash
72 -> [Text]
73 -> Cmd err Int
74 searchCountInCorpus cId t q = runCountOpaQuery
75 $ queryInCorpus cId t
76 $ intercalate " | "
77 $ map stemIt q
78
79 queryInCorpus :: CorpusId
80 -> IsTrash
81 -> Text
82 -> O.Query FacetDocRead
83 queryInCorpus cId t q = proc () -> do
84 (n, nn) <- joinInCorpus -< ()
85 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
86 restrict -< if t
87 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
88 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
89 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
90 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
91 returnA -< FacetDoc (n^.ns_id )
92 (n^.ns_date )
93 (n^.ns_name )
94 (n^.ns_hyperdata )
95 (nn^.nn_category )
96 (nn^.nn_score )
97 (nn^.nn_score )
98
99 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
100 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
101 where
102 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
103 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
104
105 ------------------------------------------------------------------------
106 searchInCorpusWithContacts
107 :: CorpusId
108 -> AnnuaireId
109 -> [Text]
110 -> Maybe Offset
111 -> Maybe Limit
112 -> Maybe OrderBy
113 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
114 searchInCorpusWithContacts cId aId q o l _order =
115 runOpaQuery $ limit' l
116 $ offset' o
117 $ orderBy ( desc _fp_score)
118 $ selectGroup cId aId
119 $ intercalate " | "
120 $ map stemIt q
121
122 selectContactViaDoc
123 :: CorpusId
124 -> AnnuaireId
125 -> Text
126 -> QueryArr ()
127 ( Column (Nullable PGInt4)
128 , Column (Nullable PGTimestamptz)
129 , Column (Nullable PGJsonb)
130 , Column (Nullable PGInt4)
131 )
132 selectContactViaDoc cId aId q = proc () -> do
133 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
134 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
135 restrict -< (doc^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
136 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
137 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
138 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
139 returnA -< ( contact^.node_id
140 , contact^.node_date
141 , contact^.node_hyperdata
142 , toNullable $ pgInt4 1
143 )
144
145 selectGroup :: NodeId
146 -> NodeId
147 -> Text
148 -> Select FacetPairedReadNull
149 selectGroup cId aId q = proc () -> do
150 (a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
151 (selectContactViaDoc cId aId q) -< ()
152 returnA -< FacetPaired a b c d
153
154
155 queryContactViaDoc :: O.Query ( NodeSearchRead
156 , ( NodeNodeReadNull
157 , ( NodeNodeReadNull
158 , ( NodeNodeReadNull
159 , NodeReadNull
160 )
161 )
162 )
163 )
164 queryContactViaDoc =
165 leftJoin5
166 queryNodeTable
167 queryNodeNodeTable
168 queryNodeNodeTable
169 queryNodeNodeTable
170 queryNodeSearchTable
171 cond12
172 cond23
173 cond34
174 cond45
175 where
176 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
177 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
178
179 cond23 :: ( NodeNodeRead
180 , ( NodeNodeRead
181 , NodeReadNull
182 )
183 ) -> Column PGBool
184 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
185
186 cond34 :: ( NodeNodeRead
187 , ( NodeNodeRead
188 , ( NodeNodeReadNull
189 , NodeReadNull
190 )
191 )
192 ) -> Column PGBool
193 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
194
195
196 cond45 :: ( NodeSearchRead
197 , ( NodeNodeRead
198 , ( NodeNodeReadNull
199 , ( NodeNodeReadNull
200 , NodeReadNull
201 )
202 )
203 )
204 ) -> Column PGBool
205 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
206
207
208 ------------------------------------------------------------------------
209
210 newtype TSQuery = UnsafeTSQuery [Text]
211
212 -- | TODO [""] -> panic "error"
213 toTSQuery :: [Text] -> TSQuery
214 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
215
216
217 instance IsString TSQuery
218 where
219 fromString = UnsafeTSQuery . words . cs
220
221
222 instance ToField TSQuery
223 where
224 toField (UnsafeTSQuery xs)
225 = Many $ intersperse (Plain " && ")
226 $ map (\q -> Many [ Plain "plainto_tsquery("
227 , Escape (cs q)
228 , Plain ")"
229 ]
230 ) xs
231
232 data Order = Asc | Desc
233
234 instance ToField Order
235 where
236 toField Asc = Plain "ASC"
237 toField Desc = Plain "DESC"
238
239 -- TODO
240 -- FIX fav
241 -- ADD ngrams count
242 -- TESTS
243 textSearchQuery :: Query
244 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
245 \ , n.hyperdata->'title' \
246 \ , n.hyperdata->'source' \
247 \ , n.hyperdata->'authors' \
248 \ , COALESCE(nn.score,null) \
249 \ FROM nodes n \
250 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
251 \ WHERE \
252 \ n.search @@ (?::tsquery) \
253 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
254 \ AND n.typename = ? \
255 \ ORDER BY n.hyperdata -> 'publication_date' ? \
256 \ offset ? limit ?;"
257
258 -- | Text Search Function for Master Corpus
259 -- TODO : text search for user corpus
260 -- Example:
261 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
262 -- textSearchTest pId q = textSearch q pId 5 0 Asc
263 textSearch :: TSQuery -> ParentId
264 -> Limit -> Offset -> Order
265 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
266 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
267 where
268 typeId = nodeTypeId NodeDocument
269
270