]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
[Pairing] clean code
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12
13 module Gargantext.Database.Action.Search where
14
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.Maybe
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core.Types
26 import Gargantext.Database.Admin.Config (nodeTypeId)
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node (NodeType(..))
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
30 import Gargantext.Database.Query.Facet
31 import Gargantext.Database.Query.Filter
32 import Gargantext.Database.Query.Join (leftJoin5)
33 import Gargantext.Database.Query.Table.Node
34 import Gargantext.Database.Query.Table.NodeNode
35 import Gargantext.Database.Schema.Node
36 import Gargantext.Prelude
37 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
38 import Opaleye hiding (Query, Order)
39 import qualified Opaleye as O hiding (Order)
40
41 ------------------------------------------------------------------------
42 searchDocInDatabase :: ParentId
43 -> Text
44 -> Cmd err [(NodeId, HyperdataDocument)]
45 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
46 where
47 -- | Global search query where ParentId is Master Node Corpus Id
48 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
49 queryDocInDatabase _ q = proc () -> do
50 row <- queryNodeSearchTable -< ()
51 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
52 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
53 returnA -< (_ns_id row, _ns_hyperdata row)
54
55 ------------------------------------------------------------------------
56 -- | todo add limit and offset and order
57 searchInCorpus :: CorpusId
58 -> IsTrash
59 -> [Text]
60 -> Maybe Offset
61 -> Maybe Limit
62 -> Maybe OrderBy
63 -> Cmd err [FacetDoc]
64 searchInCorpus cId t q o l order = runOpaQuery
65 $ filterWith o l order
66 $ queryInCorpus cId t
67 $ intercalate " | "
68 $ map stemIt q
69
70 searchCountInCorpus :: CorpusId
71 -> IsTrash
72 -> [Text]
73 -> Cmd err Int
74 searchCountInCorpus cId t q = runCountOpaQuery
75 $ queryInCorpus cId t
76 $ intercalate " | "
77 $ map stemIt q
78
79 queryInCorpus :: CorpusId
80 -> IsTrash
81 -> Text
82 -> O.Query FacetDocRead
83 queryInCorpus cId t q = proc () -> do
84 (n, nn) <- joinInCorpus -< ()
85 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
86 restrict -< if t
87 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
88 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
89 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
90 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
91 returnA -< FacetDoc (n^.ns_id )
92 (n^.ns_date )
93 (n^.ns_name )
94 (n^.ns_hyperdata)
95 (nn^.nn_category)
96 (nn^.nn_score )
97
98 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
99 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
100 where
101 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
102 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
103
104 ------------------------------------------------------------------------
105 searchInCorpusWithContacts
106 :: CorpusId
107 -> AnnuaireId
108 -> [Text]
109 -> Maybe Offset
110 -> Maybe Limit
111 -> Maybe OrderBy
112 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
113 searchInCorpusWithContacts cId aId q o l _order =
114 runOpaQuery $ limit' l
115 $ offset' o
116 -- $ orderBy ( o l order
117 $ selectContactViaDoc cId aId
118 $ intercalate " | "
119 $ map stemIt q
120
121
122 selectContactViaDoc
123 :: CorpusId
124 -> AnnuaireId
125 -> Text
126 -> O.Query FacetPairedReadNull
127 selectContactViaDoc cId aId q = proc () -> do
128 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
129 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
130 restrict -< (doc^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
131 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
132 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
133 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
134 returnA -< FacetPaired (contact^.node_id)
135 (contact^.node_date)
136 (contact^.node_hyperdata)
137 (toNullable $ pgInt4 0)
138
139 queryContactViaDoc :: O.Query ( NodeSearchRead
140 , ( NodeNodeReadNull
141 , ( NodeNodeReadNull
142 , ( NodeNodeReadNull
143 , NodeReadNull
144 )
145 )
146 )
147 )
148 queryContactViaDoc =
149 leftJoin5
150 queryNodeTable
151 queryNodeNodeTable
152 queryNodeNodeTable
153 queryNodeNodeTable
154 queryNodeSearchTable
155 cond12
156 cond23
157 cond34
158 cond45
159 where
160 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
161 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
162
163 cond23 :: ( NodeNodeRead
164 , ( NodeNodeRead
165 , NodeReadNull
166 )
167 ) -> Column PGBool
168 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
169
170 cond34 :: ( NodeNodeRead
171 , ( NodeNodeRead
172 , ( NodeNodeReadNull
173 , NodeReadNull
174 )
175 )
176 ) -> Column PGBool
177 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
178
179
180 cond45 :: ( NodeSearchRead
181 , ( NodeNodeRead
182 , ( NodeNodeReadNull
183 , ( NodeNodeReadNull
184 , NodeReadNull
185 )
186 )
187 )
188 ) -> Column PGBool
189 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
190
191
192 ------------------------------------------------------------------------
193
194 newtype TSQuery = UnsafeTSQuery [Text]
195
196 -- | TODO [""] -> panic "error"
197 toTSQuery :: [Text] -> TSQuery
198 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
199
200
201 instance IsString TSQuery
202 where
203 fromString = UnsafeTSQuery . words . cs
204
205
206 instance ToField TSQuery
207 where
208 toField (UnsafeTSQuery xs)
209 = Many $ intersperse (Plain " && ")
210 $ map (\q -> Many [ Plain "plainto_tsquery("
211 , Escape (cs q)
212 , Plain ")"
213 ]
214 ) xs
215
216 data Order = Asc | Desc
217
218 instance ToField Order
219 where
220 toField Asc = Plain "ASC"
221 toField Desc = Plain "DESC"
222
223 -- TODO
224 -- FIX fav
225 -- ADD ngrams count
226 -- TESTS
227 textSearchQuery :: Query
228 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
229 \ , n.hyperdata->'title' \
230 \ , n.hyperdata->'source' \
231 \ , n.hyperdata->'authors' \
232 \ , COALESCE(nn.score,null) \
233 \ FROM nodes n \
234 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
235 \ WHERE \
236 \ n.search @@ (?::tsquery) \
237 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
238 \ AND n.typename = ? \
239 \ ORDER BY n.hyperdata -> 'publication_date' ? \
240 \ offset ? limit ?;"
241
242 -- | Text Search Function for Master Corpus
243 -- TODO : text search for user corpus
244 -- Example:
245 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
246 -- textSearchTest pId q = textSearch q pId 5 0 Asc
247 textSearch :: TSQuery -> ParentId
248 -> Limit -> Offset -> Order
249 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
250 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
251 where
252 typeId = nodeTypeId NodeDocument
253
254