2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
13 module Gargantext.Database.Action.Search where
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
18 import Data.List (intersperse)
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core
26 import Gargantext.Core.Types
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
29 import Gargantext.Database.Query.Facet
30 import Gargantext.Database.Query.Filter
31 import Gargantext.Database.Query.Join (leftJoin5)
32 import Gargantext.Database.Query.Table.Node
33 import Gargantext.Database.Query.Table.NodeNode
34 import Gargantext.Database.Schema.Node
35 import Gargantext.Prelude
36 import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
37 import Opaleye hiding (Query, Order)
38 import Data.Profunctor.Product (p4)
39 import qualified Opaleye as O hiding (Order)
41 ------------------------------------------------------------------------
42 searchDocInDatabase :: HasDBid NodeType
45 -> Cmd err [(NodeId, HyperdataDocument)]
46 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
48 -- | Global search query where ParentId is Master Node Corpus Id
49 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
50 queryDocInDatabase _ q = proc () -> do
51 row <- queryNodeSearchTable -< ()
52 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
53 restrict -< (_ns_typename row) .== (pgInt4 $ toDBid NodeDocument)
54 returnA -< (_ns_id row, _ns_hyperdata row)
56 ------------------------------------------------------------------------
57 -- | todo add limit and offset and order
58 searchInCorpus :: HasDBid NodeType
66 searchInCorpus cId t q o l order = runOpaQuery
67 $ filterWith o l order
72 searchCountInCorpus :: HasDBid NodeType
77 searchCountInCorpus cId t q = runCountOpaQuery
82 queryInCorpus :: HasDBid NodeType
86 -> O.Query FacetDocRead
87 queryInCorpus cId t q = proc () -> do
88 (n, nn) <- joinInCorpus -< ()
89 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
91 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
92 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
93 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
94 restrict -< (n ^. ns_typename ) .== (pgInt4 $ toDBid NodeDocument)
95 returnA -< FacetDoc (n^.ns_id )
103 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
104 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
106 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
107 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
109 ------------------------------------------------------------------------
110 searchInCorpusWithContacts
118 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
119 searchInCorpusWithContacts cId aId q o l _order =
120 runOpaQuery $ limit' l
122 $ orderBy ( desc _fp_score)
123 $ selectGroup cId aId
133 ( Column (Nullable PGInt4)
134 , Column (Nullable PGTimestamptz)
135 , Column (Nullable PGJsonb)
136 , Column (Nullable PGInt4)
138 selectContactViaDoc cId aId q = proc () -> do
139 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
140 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
141 restrict -< (doc^.ns_typename) .== (pgInt4 $ toDBid NodeDocument)
142 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
143 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
144 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ toDBid NodeContact)
145 returnA -< ( contact^.node_id
147 , contact^.node_hyperdata
148 , toNullable $ pgInt4 1
151 selectGroup :: HasDBid NodeType
155 -> Select FacetPairedReadNull
156 selectGroup cId aId q = proc () -> do
157 (a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
158 (selectContactViaDoc cId aId q) -< ()
159 returnA -< FacetPaired a b c d
162 queryContactViaDoc :: O.Query ( NodeSearchRead
183 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
184 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
186 cond23 :: ( NodeNodeRead
191 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
193 cond34 :: ( NodeNodeRead
200 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
203 cond45 :: ( NodeSearchRead
212 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
215 ------------------------------------------------------------------------
217 newtype TSQuery = UnsafeTSQuery [Text]
219 -- | TODO [""] -> panic "error"
220 toTSQuery :: [Text] -> TSQuery
221 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
224 instance IsString TSQuery
226 fromString = UnsafeTSQuery . words . cs
229 instance ToField TSQuery
231 toField (UnsafeTSQuery xs)
232 = Many $ intersperse (Plain " && ")
233 $ map (\q -> Many [ Plain "plainto_tsquery("
239 data Order = Asc | Desc
241 instance ToField Order
243 toField Asc = Plain "ASC"
244 toField Desc = Plain "DESC"
250 textSearchQuery :: Query
251 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
252 \ , n.hyperdata->'title' \
253 \ , n.hyperdata->'source' \
254 \ , n.hyperdata->'authors' \
255 \ , COALESCE(nn.score,null) \
257 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
259 \ n.search @@ (?::tsquery) \
260 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
261 \ AND n.typename = ? \
262 \ ORDER BY n.hyperdata -> 'publication_date' ? \
265 -- | Text Search Function for Master Corpus
266 -- TODO : text search for user corpus
268 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
269 -- textSearchTest pId q = textSearch q pId 5 0 Asc
270 textSearch :: HasDBid NodeType
271 => TSQuery -> ParentId
272 -> Limit -> Offset -> Order
273 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
274 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
276 typeId = toDBid NodeDocument