2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
13 module Gargantext.Database.Action.Search where
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
18 import Data.List (intersperse)
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core.Types
26 import Gargantext.Database.Admin.Config (nodeTypeId)
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node (NodeType(..))
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
30 import Gargantext.Database.Query.Facet
31 import Gargantext.Database.Query.Filter
32 import Gargantext.Database.Query.Join (leftJoin5)
33 import Gargantext.Database.Query.Table.Node
34 import Gargantext.Database.Query.Table.NodeNode
35 import Gargantext.Database.Schema.Node
36 import Gargantext.Prelude
37 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
38 import Opaleye hiding (Query, Order)
39 import Data.Profunctor.Product (p4)
40 import qualified Opaleye as O hiding (Order)
42 ------------------------------------------------------------------------
43 searchDocInDatabase :: ParentId
45 -> Cmd err [(NodeId, HyperdataDocument)]
46 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
48 -- | Global search query where ParentId is Master Node Corpus Id
49 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
50 queryDocInDatabase _ q = proc () -> do
51 row <- queryNodeSearchTable -< ()
52 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
53 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
54 returnA -< (_ns_id row, _ns_hyperdata row)
56 ------------------------------------------------------------------------
57 -- | todo add limit and offset and order
58 searchInCorpus :: CorpusId
65 searchInCorpus cId t q o l order = runOpaQuery
66 $ filterWith o l order
71 searchCountInCorpus :: CorpusId
75 searchCountInCorpus cId t q = runCountOpaQuery
80 queryInCorpus :: CorpusId
83 -> O.Query FacetDocRead
84 queryInCorpus cId t q = proc () -> do
85 (n, nn) <- joinInCorpus -< ()
86 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
88 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
89 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
90 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
91 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
92 returnA -< FacetDoc (n^.ns_id )
99 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
100 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
102 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
103 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
105 ------------------------------------------------------------------------
106 searchInCorpusWithContacts
113 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
114 searchInCorpusWithContacts cId aId q o l _order =
115 runOpaQuery $ limit' l
117 $ orderBy ( desc _fp_score)
127 -> Select FacetPairedReadNull
128 selectContactViaDoc cId aId q = proc () -> do
129 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
130 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
131 restrict -< (doc^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
132 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
133 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
134 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
135 returnA -< FacetPaired (contact^.node_id)
137 (contact^.node_hyperdata)
138 (toNullable $ pgInt4 1)
146 ( Column (Nullable PGInt4)
147 , Column (Nullable PGTimestamptz)
148 , Column (Nullable PGJsonb)
149 , Column (Nullable PGInt4)
151 selectContactViaDoc' cId aId q = proc () -> do
152 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
153 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
154 restrict -< (doc^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
155 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
156 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
157 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
158 returnA -< ( contact^.node_id
160 , contact^.node_hyperdata
161 , toNullable $ pgInt4 1
167 -> Select FacetPairedReadNull
168 group cId aId q = proc () -> do
169 (a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
170 (selectContactViaDoc' cId aId q) -< ()
171 returnA -< FacetPaired a b c d
180 queryContactViaDoc :: O.Query ( NodeSearchRead
201 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
202 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
204 cond23 :: ( NodeNodeRead
209 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
211 cond34 :: ( NodeNodeRead
218 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
221 cond45 :: ( NodeSearchRead
230 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
233 ------------------------------------------------------------------------
235 newtype TSQuery = UnsafeTSQuery [Text]
237 -- | TODO [""] -> panic "error"
238 toTSQuery :: [Text] -> TSQuery
239 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
242 instance IsString TSQuery
244 fromString = UnsafeTSQuery . words . cs
247 instance ToField TSQuery
249 toField (UnsafeTSQuery xs)
250 = Many $ intersperse (Plain " && ")
251 $ map (\q -> Many [ Plain "plainto_tsquery("
257 data Order = Asc | Desc
259 instance ToField Order
261 toField Asc = Plain "ASC"
262 toField Desc = Plain "DESC"
268 textSearchQuery :: Query
269 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
270 \ , n.hyperdata->'title' \
271 \ , n.hyperdata->'source' \
272 \ , n.hyperdata->'authors' \
273 \ , COALESCE(nn.score,null) \
275 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
277 \ n.search @@ (?::tsquery) \
278 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
279 \ AND n.typename = ? \
280 \ ORDER BY n.hyperdata -> 'publication_date' ? \
283 -- | Text Search Function for Master Corpus
284 -- TODO : text search for user corpus
286 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
287 -- textSearchTest pId q = textSearch q pId 5 0 Asc
288 textSearch :: TSQuery -> ParentId
289 -> Limit -> Offset -> Order
290 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
291 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
293 typeId = nodeTypeId NodeDocument