]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
[TextFlow] SpeGen scores (WIP)
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12
13 module Gargantext.Database.Action.Search where
14
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.Maybe
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core.Types
26 import Gargantext.Database.Admin.Config (nodeTypeId)
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
29 import Gargantext.Database.Query.Facet
30 import Gargantext.Database.Query.Filter
31 import Gargantext.Database.Query.Join (leftJoin5)
32 import Gargantext.Database.Query.Table.Node
33 import Gargantext.Database.Query.Table.NodeNode
34 import Gargantext.Database.Schema.Node
35 import Gargantext.Prelude
36 import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
37 import Opaleye hiding (Query, Order)
38 import Data.Profunctor.Product (p4)
39 import qualified Opaleye as O hiding (Order)
40
41 ------------------------------------------------------------------------
42 searchDocInDatabase :: ParentId
43 -> Text
44 -> Cmd err [(NodeId, HyperdataDocument)]
45 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
46 where
47 -- | Global search query where ParentId is Master Node Corpus Id
48 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
49 queryDocInDatabase _ q = proc () -> do
50 row <- queryNodeSearchTable -< ()
51 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
52 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
53 returnA -< (_ns_id row, _ns_hyperdata row)
54
55 ------------------------------------------------------------------------
56 -- | todo add limit and offset and order
57 searchInCorpus :: CorpusId
58 -> IsTrash
59 -> [Text]
60 -> Maybe Offset
61 -> Maybe Limit
62 -> Maybe OrderBy
63 -> Cmd err [FacetDoc]
64 searchInCorpus cId t q o l order = runOpaQuery
65 $ filterWith o l order
66 $ queryInCorpus cId t
67 $ intercalate " | "
68 $ map stemIt q
69
70 searchCountInCorpus :: CorpusId
71 -> IsTrash
72 -> [Text]
73 -> Cmd err Int
74 searchCountInCorpus cId t q = runCountOpaQuery
75 $ queryInCorpus cId t
76 $ intercalate " | "
77 $ map stemIt q
78
79 queryInCorpus :: CorpusId
80 -> IsTrash
81 -> Text
82 -> O.Query FacetDocRead
83 queryInCorpus cId t q = proc () -> do
84 (n, nn) <- joinInCorpus -< ()
85 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
86 restrict -< if t
87 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
88 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
89 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
90 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
91 returnA -< FacetDoc (n^.ns_id )
92 (n^.ns_date )
93 (n^.ns_name )
94 (n^.ns_hyperdata)
95 (nn^.nn_category)
96 (nn^.nn_score )
97
98 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
99 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
100 where
101 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
102 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
103
104 ------------------------------------------------------------------------
105 searchInCorpusWithContacts
106 :: CorpusId
107 -> AnnuaireId
108 -> [Text]
109 -> Maybe Offset
110 -> Maybe Limit
111 -> Maybe OrderBy
112 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
113 searchInCorpusWithContacts cId aId q o l _order =
114 runOpaQuery $ limit' l
115 $ offset' o
116 $ orderBy ( desc _fp_score)
117 $ selectGroup cId aId
118 $ intercalate " | "
119 $ map stemIt q
120
121 selectContactViaDoc
122 :: CorpusId
123 -> AnnuaireId
124 -> Text
125 -> QueryArr ()
126 ( Column (Nullable PGInt4)
127 , Column (Nullable PGTimestamptz)
128 , Column (Nullable PGJsonb)
129 , Column (Nullable PGInt4)
130 )
131 selectContactViaDoc cId aId q = proc () -> do
132 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
133 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
134 restrict -< (doc^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
135 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
136 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
137 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
138 returnA -< ( contact^.node_id
139 , contact^.node_date
140 , contact^.node_hyperdata
141 , toNullable $ pgInt4 1
142 )
143
144 selectGroup :: NodeId
145 -> NodeId
146 -> Text
147 -> Select FacetPairedReadNull
148 selectGroup cId aId q = proc () -> do
149 (a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
150 (selectContactViaDoc cId aId q) -< ()
151 returnA -< FacetPaired a b c d
152
153
154 queryContactViaDoc :: O.Query ( NodeSearchRead
155 , ( NodeNodeReadNull
156 , ( NodeNodeReadNull
157 , ( NodeNodeReadNull
158 , NodeReadNull
159 )
160 )
161 )
162 )
163 queryContactViaDoc =
164 leftJoin5
165 queryNodeTable
166 queryNodeNodeTable
167 queryNodeNodeTable
168 queryNodeNodeTable
169 queryNodeSearchTable
170 cond12
171 cond23
172 cond34
173 cond45
174 where
175 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
176 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
177
178 cond23 :: ( NodeNodeRead
179 , ( NodeNodeRead
180 , NodeReadNull
181 )
182 ) -> Column PGBool
183 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
184
185 cond34 :: ( NodeNodeRead
186 , ( NodeNodeRead
187 , ( NodeNodeReadNull
188 , NodeReadNull
189 )
190 )
191 ) -> Column PGBool
192 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
193
194
195 cond45 :: ( NodeSearchRead
196 , ( NodeNodeRead
197 , ( NodeNodeReadNull
198 , ( NodeNodeReadNull
199 , NodeReadNull
200 )
201 )
202 )
203 ) -> Column PGBool
204 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
205
206
207 ------------------------------------------------------------------------
208
209 newtype TSQuery = UnsafeTSQuery [Text]
210
211 -- | TODO [""] -> panic "error"
212 toTSQuery :: [Text] -> TSQuery
213 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
214
215
216 instance IsString TSQuery
217 where
218 fromString = UnsafeTSQuery . words . cs
219
220
221 instance ToField TSQuery
222 where
223 toField (UnsafeTSQuery xs)
224 = Many $ intersperse (Plain " && ")
225 $ map (\q -> Many [ Plain "plainto_tsquery("
226 , Escape (cs q)
227 , Plain ")"
228 ]
229 ) xs
230
231 data Order = Asc | Desc
232
233 instance ToField Order
234 where
235 toField Asc = Plain "ASC"
236 toField Desc = Plain "DESC"
237
238 -- TODO
239 -- FIX fav
240 -- ADD ngrams count
241 -- TESTS
242 textSearchQuery :: Query
243 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
244 \ , n.hyperdata->'title' \
245 \ , n.hyperdata->'source' \
246 \ , n.hyperdata->'authors' \
247 \ , COALESCE(nn.score,null) \
248 \ FROM nodes n \
249 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
250 \ WHERE \
251 \ n.search @@ (?::tsquery) \
252 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
253 \ AND n.typename = ? \
254 \ ORDER BY n.hyperdata -> 'publication_date' ? \
255 \ offset ? limit ?;"
256
257 -- | Text Search Function for Master Corpus
258 -- TODO : text search for user corpus
259 -- Example:
260 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
261 -- textSearchTest pId q = textSearch q pId 5 0 Asc
262 textSearch :: TSQuery -> ParentId
263 -> Limit -> Offset -> Order
264 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
265 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
266 where
267 typeId = nodeTypeId NodeDocument
268
269