]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
[Clean]
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12
13 module Gargantext.Database.Action.Search where
14
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.Maybe
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core.Types
26 import Gargantext.Database.Admin.Config (nodeTypeId)
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node (NodeType(..))
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
30 import Gargantext.Database.Query.Facet
31 import Gargantext.Database.Query.Filter
32 import Gargantext.Database.Query.Join (leftJoin5)
33 import Gargantext.Database.Query.Table.Node
34 import Gargantext.Database.Query.Table.NodeNode
35 import Gargantext.Database.Schema.Node
36 import Gargantext.Prelude
37 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
38 import Opaleye hiding (Query, Order)
39 import qualified Opaleye as O hiding (Order)
40
41 ------------------------------------------------------------------------
42 searchDocInDatabase :: ParentId
43 -> Text
44 -> Cmd err [(NodeId, HyperdataDocument)]
45 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
46 where
47 -- | Global search query where ParentId is Master Node Corpus Id
48 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
49 queryDocInDatabase _ q = proc () -> do
50 row <- queryNodeSearchTable -< ()
51 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
52 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
53 returnA -< (_ns_id row, _ns_hyperdata row)
54
55 ------------------------------------------------------------------------
56 -- | todo add limit and offset and order
57 searchInCorpus :: CorpusId
58 -> IsTrash
59 -> [Text]
60 -> Maybe Offset
61 -> Maybe Limit
62 -> Maybe OrderBy
63 -> Cmd err [FacetDoc]
64 searchInCorpus cId t q o l order = runOpaQuery
65 $ filterWith o l order
66 $ queryInCorpus cId t
67 $ intercalate " | "
68 $ map stemIt q
69
70 searchCountInCorpus :: CorpusId
71 -> IsTrash
72 -> [Text]
73 -> Cmd err Int
74 searchCountInCorpus cId t q = runCountOpaQuery
75 $ queryInCorpus cId t
76 $ intercalate " | "
77 $ map stemIt q
78
79 queryInCorpus :: CorpusId
80 -> IsTrash
81 -> Text
82 -> O.Query FacetDocRead
83 queryInCorpus cId t q = proc () -> do
84 (n, nn) <- joinInCorpus -< ()
85 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
86 restrict -< if t
87 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
88 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
89 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
90 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
91 returnA -< FacetDoc (n^.ns_id )
92 (n^.ns_date )
93 (n^.ns_name )
94 (n^.ns_hyperdata)
95 (nn^.nn_category)
96 (nn^.nn_score )
97
98 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
99 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
100 where
101 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
102 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
103
104 ------------------------------------------------------------------------
105 searchInCorpusWithContacts
106 :: CorpusId
107 -> AnnuaireId
108 -> [Text]
109 -> Maybe Offset
110 -> Maybe Limit
111 -> Maybe OrderBy
112 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
113 searchInCorpusWithContacts cId aId q o l _order =
114 runOpaQuery $ limit' l
115 $ offset' o
116 -- $ orderBy ( o l order
117 $ selectContactViaDoc cId aId
118 $ intercalate " | "
119 $ map stemIt q
120
121
122 selectContactViaDoc
123 :: CorpusId
124 -> AnnuaireId
125 -> Text
126 -> O.Query FacetPairedReadNull
127 selectContactViaDoc cId aId q = proc () -> do
128 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
129 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
130 restrict -< (doc^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
131 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
132 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
133 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
134 returnA -< FacetPaired (contact^.node_id)
135 (contact^.node_date)
136 (contact^.node_hyperdata)
137 (toNullable $ pgInt4 0)
138
139
140
141 queryContactViaDoc :: O.Query ( NodeSearchRead
142 , ( NodeNodeReadNull
143 , ( NodeNodeReadNull
144 , ( NodeNodeReadNull
145 , NodeReadNull
146 )
147 )
148 )
149 )
150 queryContactViaDoc =
151 leftJoin5
152 queryNodeTable
153 queryNodeNodeTable
154 queryNodeNodeTable
155 queryNodeNodeTable
156 queryNodeSearchTable
157 cond12
158 cond23
159 cond34
160 cond45
161 where
162 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
163 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
164
165 cond23 :: ( NodeNodeRead
166 , ( NodeNodeRead
167 , NodeReadNull
168 )
169 ) -> Column PGBool
170 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
171
172 cond34 :: ( NodeNodeRead
173 , ( NodeNodeRead
174 , ( NodeNodeReadNull
175 , NodeReadNull
176 )
177 )
178 ) -> Column PGBool
179 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
180
181
182 cond45 :: ( NodeSearchRead
183 , ( NodeNodeRead
184 , ( NodeNodeReadNull
185 , ( NodeNodeReadNull
186 , NodeReadNull
187 )
188 )
189 )
190 ) -> Column PGBool
191 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
192
193
194 ------------------------------------------------------------------------
195
196 newtype TSQuery = UnsafeTSQuery [Text]
197
198 -- | TODO [""] -> panic "error"
199 toTSQuery :: [Text] -> TSQuery
200 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
201
202
203 instance IsString TSQuery
204 where
205 fromString = UnsafeTSQuery . words . cs
206
207
208 instance ToField TSQuery
209 where
210 toField (UnsafeTSQuery xs)
211 = Many $ intersperse (Plain " && ")
212 $ map (\q -> Many [ Plain "plainto_tsquery("
213 , Escape (cs q)
214 , Plain ")"
215 ]
216 ) xs
217
218 data Order = Asc | Desc
219
220 instance ToField Order
221 where
222 toField Asc = Plain "ASC"
223 toField Desc = Plain "DESC"
224
225 -- TODO
226 -- FIX fav
227 -- ADD ngrams count
228 -- TESTS
229 textSearchQuery :: Query
230 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
231 \ , n.hyperdata->'title' \
232 \ , n.hyperdata->'source' \
233 \ , n.hyperdata->'authors' \
234 \ , COALESCE(nn.score,null) \
235 \ FROM nodes n \
236 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
237 \ WHERE \
238 \ n.search @@ (?::tsquery) \
239 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
240 \ AND n.typename = ? \
241 \ ORDER BY n.hyperdata -> 'publication_date' ? \
242 \ offset ? limit ?;"
243
244 -- | Text Search Function for Master Corpus
245 -- TODO : text search for user corpus
246 -- Example:
247 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
248 -- textSearchTest pId q = textSearch q pId 5 0 Asc
249 textSearch :: TSQuery -> ParentId
250 -> Limit -> Offset -> Order
251 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
252 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
253 where
254 typeId = nodeTypeId NodeDocument
255
256