2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
13 module Gargantext.Database.Action.Search where
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
18 import Data.List (intersperse)
20 import Data.String (IsString(..))
21 import Data.Text (Text, words, unpack, intercalate)
22 import Data.Time (UTCTime)
23 import Database.PostgreSQL.Simple (Query)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Core.Types
26 import Gargantext.Database.Admin.Config (nodeTypeId)
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
28 import Gargantext.Database.Admin.Types.Node (NodeType(..))
29 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
30 import Gargantext.Database.Query.Facet
31 import Gargantext.Database.Query.Filter
32 import Gargantext.Database.Query.Join (leftJoin5)
33 import Gargantext.Database.Query.Table.Node
34 import Gargantext.Database.Query.Table.NodeNode
35 import Gargantext.Database.Schema.Node
36 import Gargantext.Prelude
37 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
38 import Opaleye hiding (Query, Order)
39 import qualified Opaleye as O hiding (Order)
41 ------------------------------------------------------------------------
42 searchDocInDatabase :: ParentId
44 -> Cmd err [(NodeId, HyperdataDocument)]
45 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
47 -- | Global search query where ParentId is Master Node Corpus Id
48 queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
49 queryDocInDatabase _ q = proc () -> do
50 row <- queryNodeSearchTable -< ()
51 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
52 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
53 returnA -< (_ns_id row, _ns_hyperdata row)
55 ------------------------------------------------------------------------
56 -- | todo add limit and offset and order
57 searchInCorpus :: CorpusId
64 searchInCorpus cId t q o l order = runOpaQuery
65 $ filterWith o l order
70 searchCountInCorpus :: CorpusId
74 searchCountInCorpus cId t q = runCountOpaQuery
79 queryInCorpus :: CorpusId
82 -> O.Query FacetDocRead
83 queryInCorpus cId t q = proc () -> do
84 (n, nn) <- joinInCorpus -< ()
85 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
87 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
88 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
89 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
90 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
91 returnA -< FacetDoc (n^.ns_id )
98 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
99 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
101 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
102 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
104 ------------------------------------------------------------------------
105 searchInCorpusWithContacts
112 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
113 searchInCorpusWithContacts cId aId q o l _order =
114 runOpaQuery $ limit' l
116 -- $ orderBy ( o l order
117 $ selectContactViaDoc cId aId
126 -> O.Query FacetPairedReadNull
127 selectContactViaDoc cId aId q = proc () -> do
128 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
129 restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
130 restrict -< (doc^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
131 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
132 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
133 restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
134 returnA -< FacetPaired (contact^.node_id)
136 (contact^.node_hyperdata)
137 (toNullable $ pgInt4 0)
139 queryContactViaDoc :: O.Query ( NodeSearchRead
160 cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
161 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
163 cond23 :: ( NodeNodeRead
168 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
170 cond34 :: ( NodeNodeRead
177 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
180 cond45 :: ( NodeSearchRead
189 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
192 ------------------------------------------------------------------------
194 newtype TSQuery = UnsafeTSQuery [Text]
196 -- | TODO [""] -> panic "error"
197 toTSQuery :: [Text] -> TSQuery
198 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
201 instance IsString TSQuery
203 fromString = UnsafeTSQuery . words . cs
206 instance ToField TSQuery
208 toField (UnsafeTSQuery xs)
209 = Many $ intersperse (Plain " && ")
210 $ map (\q -> Many [ Plain "plainto_tsquery("
216 data Order = Asc | Desc
218 instance ToField Order
220 toField Asc = Plain "ASC"
221 toField Desc = Plain "DESC"
227 textSearchQuery :: Query
228 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
229 \ , n.hyperdata->'title' \
230 \ , n.hyperdata->'source' \
231 \ , n.hyperdata->'authors' \
232 \ , COALESCE(nn.score,null) \
234 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
236 \ n.search @@ (?::tsquery) \
237 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
238 \ AND n.typename = ? \
239 \ ORDER BY n.hyperdata -> 'publication_date' ? \
242 -- | Text Search Function for Master Corpus
243 -- TODO : text search for user corpus
245 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
246 -- textSearchTest pId q = textSearch q pId 5 0 Asc
247 textSearch :: TSQuery -> ParentId
248 -> Limit -> Offset -> Order
249 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
250 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
252 typeId = nodeTypeId NodeDocument