2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
13 module Gargantext.Database.Action.Search where
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
18 import Data.List (intersperse)
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core.Types
26 import Gargantext.Database.Admin.Config (nodeTypeId)
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node (NodeType(..))
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
30 import Gargantext.Database.Query.Facet
31 import Gargantext.Database.Query.Filter
32 import Gargantext.Database.Query.Join (leftJoin5)
33 import Gargantext.Database.Query.Table.Node
34 import Gargantext.Database.Query.Table.NodeNode
35 import Gargantext.Database.Schema.Node
36 import Gargantext.Prelude
37 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
38 import Opaleye hiding (Query, Order)
39 import qualified Opaleye as O hiding (Order)
41 ------------------------------------------------------------------------
42 searchDocInDatabase :: ParentId
44 -> Cmd err [(NodeId, HyperdataDocument)]
45 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
47 -- | Global search query where ParentId is Master Node Corpus Id
48 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
49 queryDocInDatabase _ q = proc () -> do
50 row <- queryNodeSearchTable -< ()
51 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
52 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
53 returnA -< (_ns_id row, _ns_hyperdata row)
55 ------------------------------------------------------------------------
56 -- | todo add limit and offset and order
57 searchInCorpus :: CorpusId
64 searchInCorpus cId t q o l order = runOpaQuery
65 $ filterWith o l order
70 searchCountInCorpus :: CorpusId
74 searchCountInCorpus cId t q = runCountOpaQuery
79 queryInCorpus :: CorpusId
82 -> O.Query FacetDocRead
83 queryInCorpus cId t q = proc () -> do
84 (n, nn) <- joinInCorpus -< ()
85 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
87 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
88 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
89 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
90 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
91 returnA -< FacetDoc (n^.ns_id )
98 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
99 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
101 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
102 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
104 ------------------------------------------------------------------------
105 searchInCorpusWithContacts
112 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
113 searchInCorpusWithContacts cId aId q o l _order =
114 runOpaQuery $ limit' l
116 -- $ orderBy ( o l order
117 $ selectContactViaDoc cId aId
126 -> O.Query FacetPairedReadNull
127 selectContactViaDoc cId aId q = proc () -> do
128 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
129 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
130 restrict -< (doc^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
131 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
132 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
133 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
134 returnA -< FacetPaired (contact^.node_id)
136 (contact^.node_hyperdata)
137 (toNullable $ pgInt4 0)
141 queryContactViaDoc :: O.Query ( NodeSearchRead
162 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
163 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
165 cond23 :: ( NodeNodeRead
170 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
172 cond34 :: ( NodeNodeRead
179 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
182 cond45 :: ( NodeSearchRead
191 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
194 ------------------------------------------------------------------------
196 newtype TSQuery = UnsafeTSQuery [Text]
198 -- | TODO [""] -> panic "error"
199 toTSQuery :: [Text] -> TSQuery
200 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
203 instance IsString TSQuery
205 fromString = UnsafeTSQuery . words . cs
208 instance ToField TSQuery
210 toField (UnsafeTSQuery xs)
211 = Many $ intersperse (Plain " && ")
212 $ map (\q -> Many [ Plain "plainto_tsquery("
218 data Order = Asc | Desc
220 instance ToField Order
222 toField Asc = Plain "ASC"
223 toField Desc = Plain "DESC"
229 textSearchQuery :: Query
230 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
231 \ , n.hyperdata->'title' \
232 \ , n.hyperdata->'source' \
233 \ , n.hyperdata->'authors' \
234 \ , COALESCE(nn.score,null) \
236 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
238 \ n.search @@ (?::tsquery) \
239 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
240 \ AND n.typename = ? \
241 \ ORDER BY n.hyperdata -> 'publication_date' ? \
244 -- | Text Search Function for Master Corpus
245 -- TODO : text search for user corpus
247 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
248 -- textSearchTest pId q = textSearch q pId 5 0 Asc
249 textSearch :: TSQuery -> ParentId
250 -> Limit -> Offset -> Order
251 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
252 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
254 typeId = nodeTypeId NodeDocument