2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
13 module Gargantext.Database.Action.Search where
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
18 import Data.List (intersperse)
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core
26 import Gargantext.Core.Types
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
29 import Gargantext.Database.Query.Facet
30 import Gargantext.Database.Query.Filter
31 import Gargantext.Database.Query.Join (leftJoin5)
32 import Gargantext.Database.Query.Table.Node
33 import Gargantext.Database.Query.Table.NodeNode
34 import Gargantext.Database.Schema.Node
35 import Gargantext.Prelude
36 import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
37 import Opaleye hiding (Query, Order)
38 import Data.Profunctor.Product (p4)
39 import qualified Opaleye as O hiding (Order)
41 ------------------------------------------------------------------------
42 searchDocInDatabase :: HasDBid NodeType
45 -> Cmd err [(NodeId, HyperdataDocument)]
46 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
48 -- | Global search query where ParentId is Master Node Corpus Id
49 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
50 queryDocInDatabase _ q = proc () -> do
51 row <- queryNodeSearchTable -< ()
52 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
53 restrict -< (_ns_typename row) .== (pgInt4 $ toDBid NodeDocument)
54 returnA -< (_ns_id row, _ns_hyperdata row)
56 ------------------------------------------------------------------------
57 -- | todo add limit and offset and order
58 searchInCorpus :: HasDBid NodeType
66 searchInCorpus cId t q o l order = runOpaQuery
67 $ filterWith o l order
72 searchCountInCorpus :: HasDBid NodeType
77 searchCountInCorpus cId t q = runCountOpaQuery
82 queryInCorpus :: HasDBid NodeType
86 -> O.Query FacetDocRead
87 queryInCorpus cId t q = proc () -> do
88 (n, nn) <- joinInCorpus -< ()
89 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
91 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
92 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
93 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
94 restrict -< (n ^. ns_typename ) .== (pgInt4 $ toDBid NodeDocument)
95 returnA -< FacetDoc { facetDoc_id = n^.ns_id
96 , facetDoc_created = n^.ns_date
97 , facetDoc_title = n^.ns_name
98 , facetDoc_hyperdata = n^.ns_hyperdata
99 , facetDoc_category = nn^.nn_category
100 , facetDoc_ngramCount = nn^.nn_score
101 , facetDoc_score = nn^.nn_score }
103 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
104 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
106 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
107 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
109 ------------------------------------------------------------------------
110 searchInCorpusWithContacts
118 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
119 searchInCorpusWithContacts cId aId q o l _order =
120 runOpaQuery $ limit' l
122 $ orderBy ( desc _fp_score)
123 $ selectGroup cId aId
133 ( Column (Nullable PGInt4)
134 , Column (Nullable PGTimestamptz)
135 , Column (Nullable PGJsonb)
136 , Column (Nullable PGInt4)
138 selectContactViaDoc cId aId q = proc () -> do
139 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
140 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
141 restrict -< (doc^.ns_typename) .== (pgInt4 $ toDBid NodeDocument)
142 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
143 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
144 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ toDBid NodeContact)
145 returnA -< ( contact^.node_id
147 , contact^.node_hyperdata
148 , toNullable $ pgInt4 1
151 selectGroup :: HasDBid NodeType
155 -> Select FacetPairedReadNull
156 selectGroup cId aId q = proc () -> do
157 (a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
158 (selectContactViaDoc cId aId q) -< ()
159 returnA -< FacetPaired { _fp_id = a
165 queryContactViaDoc :: O.Query ( NodeSearchRead
186 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
187 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
189 cond23 :: ( NodeNodeRead
194 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
196 cond34 :: ( NodeNodeRead
203 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
206 cond45 :: ( NodeSearchRead
215 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
218 ------------------------------------------------------------------------
220 newtype TSQuery = UnsafeTSQuery [Text]
222 -- | TODO [""] -> panic "error"
223 toTSQuery :: [Text] -> TSQuery
224 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
227 instance IsString TSQuery
229 fromString = UnsafeTSQuery . words . cs
232 instance ToField TSQuery
234 toField (UnsafeTSQuery xs)
235 = Many $ intersperse (Plain " && ")
236 $ map (\q -> Many [ Plain "plainto_tsquery("
242 data Order = Asc | Desc
244 instance ToField Order
246 toField Asc = Plain "ASC"
247 toField Desc = Plain "DESC"
253 textSearchQuery :: Query
254 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
255 \ , n.hyperdata->'title' \
256 \ , n.hyperdata->'source' \
257 \ , n.hyperdata->'authors' \
258 \ , COALESCE(nn.score,null) \
260 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
262 \ n.search @@ (?::tsquery) \
263 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
264 \ AND n.typename = ? \
265 \ ORDER BY n.hyperdata -> 'publication_date' ? \
268 -- | Text Search Function for Master Corpus
269 -- TODO : text search for user corpus
271 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
272 -- textSearchTest pId q = textSearch q pId 5 0 Asc
273 textSearch :: HasDBid NodeType
274 => TSQuery -> ParentId
275 -> Limit -> Offset -> Order
276 -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
277 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
279 typeId = toDBid NodeDocument