2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
13 module Gargantext.Database.Action.Search where
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
18 import Data.List (intersperse)
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core.Types
26 import Gargantext.Database.Admin.Config (nodeTypeId)
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
29 import Gargantext.Database.Query.Facet
30 import Gargantext.Database.Query.Filter
31 import Gargantext.Database.Query.Join (leftJoin5)
32 import Gargantext.Database.Query.Table.Node
33 import Gargantext.Database.Query.Table.NodeNode
34 import Gargantext.Database.Schema.Node
35 import Gargantext.Prelude
36 import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
37 import Opaleye hiding (Query, Order)
38 import Data.Profunctor.Product (p4)
39 import qualified Opaleye as O hiding (Order)
41 ------------------------------------------------------------------------
42 searchDocInDatabase :: ParentId
44 -> Cmd err [(NodeId, HyperdataDocument)]
45 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
47 -- | Global search query where ParentId is Master Node Corpus Id
48 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
49 queryDocInDatabase _ q = proc () -> do
50 row <- queryNodeSearchTable -< ()
51 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
52 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
53 returnA -< (_ns_id row, _ns_hyperdata row)
55 ------------------------------------------------------------------------
56 -- | todo add limit and offset and order
57 searchInCorpus :: CorpusId
64 searchInCorpus cId t q o l order = runOpaQuery
65 $ filterWith o l order
70 searchCountInCorpus :: CorpusId
74 searchCountInCorpus cId t q = runCountOpaQuery
79 queryInCorpus :: CorpusId
82 -> O.Query FacetDocRead
83 queryInCorpus cId t q = proc () -> do
84 (n, nn) <- joinInCorpus -< ()
85 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
87 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
88 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
89 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
90 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
91 returnA -< FacetDoc (n^.ns_id )
98 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
99 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
101 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
102 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
104 ------------------------------------------------------------------------
105 searchInCorpusWithContacts
112 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
113 searchInCorpusWithContacts cId aId q o l _order =
114 runOpaQuery $ limit' l
116 $ orderBy ( desc _fp_score)
117 $ selectGroup cId aId
126 ( Column (Nullable PGInt4)
127 , Column (Nullable PGTimestamptz)
128 , Column (Nullable PGJsonb)
129 , Column (Nullable PGInt4)
131 selectContactViaDoc cId aId q = proc () -> do
132 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
133 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
134 restrict -< (doc^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
135 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
136 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
137 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
138 returnA -< ( contact^.node_id
140 , contact^.node_hyperdata
141 , toNullable $ pgInt4 1
144 selectGroup :: NodeId
147 -> Select FacetPairedReadNull
148 selectGroup cId aId q = proc () -> do
149 (a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
150 (selectContactViaDoc cId aId q) -< ()
151 returnA -< FacetPaired a b c d
154 queryContactViaDoc :: O.Query ( NodeSearchRead
175 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
176 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
178 cond23 :: ( NodeNodeRead
183 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
185 cond34 :: ( NodeNodeRead
192 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
195 cond45 :: ( NodeSearchRead
204 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
207 ------------------------------------------------------------------------
209 newtype TSQuery = UnsafeTSQuery [Text]
211 -- | TODO [""] -> panic "error"
212 toTSQuery :: [Text] -> TSQuery
213 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
216 instance IsString TSQuery
218 fromString = UnsafeTSQuery . words . cs
221 instance ToField TSQuery
223 toField (UnsafeTSQuery xs)
224 = Many $ intersperse (Plain " && ")
225 $ map (\q -> Many [ Plain "plainto_tsquery("
231 data Order = Asc | Desc
233 instance ToField Order
235 toField Asc = Plain "ASC"
236 toField Desc = Plain "DESC"
242 textSearchQuery :: Query
243 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
244 \ , n.hyperdata->'title' \
245 \ , n.hyperdata->'source' \
246 \ , n.hyperdata->'authors' \
247 \ , COALESCE(nn.score,null) \
249 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
251 \ n.search @@ (?::tsquery) \
252 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
253 \ AND n.typename = ? \
254 \ ORDER BY n.hyperdata -> 'publication_date' ? \
257 -- | Text Search Function for Master Corpus
258 -- TODO : text search for user corpus
260 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
261 -- textSearchTest pId q = textSearch q pId 5 0 Asc
262 textSearch :: TSQuery -> ParentId
263 -> Limit -> Offset -> Order
264 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
265 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
267 typeId = nodeTypeId NodeDocument