2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
17 module Gargantext.Database.Action.Search where
19 import Control.Arrow (returnA)
20 import Control.Lens ((^.))
22 import Data.List (intersperse, take, drop)
23 import Data.Map.Strict hiding (map, drop, take)
25 import Data.String (IsString(..))
26 import Data.Text (Text, words, unpack, intercalate)
27 import Data.Time (UTCTime)
28 import Database.PostgreSQL.Simple (Query)
29 import Database.PostgreSQL.Simple.ToField
30 import Opaleye hiding (Query, Order)
31 import qualified Opaleye as O hiding (Order)
33 import Gargantext.Core.Types
34 import Gargantext.Database.Admin.Config (nodeTypeId)
35 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
36 import Gargantext.Database.Admin.Types.Node (NodeType(..))
37 import Gargantext.Database.Query.Facet
38 import Gargantext.Database.Query.Join (leftJoin6)
39 import Gargantext.Database.Query.Table.Node
40 import Gargantext.Database.Query.Table.NodeNode
41 import Gargantext.Database.Query.Table.NodeNodeNgrams
42 import Gargantext.Database.Query.Table.Ngrams
43 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
44 import Gargantext.Database.Schema.Node
45 import Gargantext.Prelude
46 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
48 ------------------------------------------------------------------------
49 searchInDatabase :: ParentId
51 -> Cmd err [(NodeId, HyperdataDocument)]
52 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
54 -- | Global search query where ParentId is Master Node Corpus Id
55 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
56 queryInDatabase _ q = proc () -> do
57 row <- queryNodeSearchTable -< ()
58 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
59 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
60 returnA -< (_ns_id row, _ns_hyperdata row)
62 ------------------------------------------------------------------------
63 -- | todo add limit and offset and order
64 searchInCorpus :: CorpusId
71 searchInCorpus cId t q o l order = runOpaQuery
72 $ filterWith o l order
77 searchCountInCorpus :: CorpusId
81 searchCountInCorpus cId t q = runCountOpaQuery
86 queryInCorpus :: CorpusId
89 -> O.Query FacetDocRead
90 queryInCorpus cId t q = proc () -> do
91 (n, nn) <- joinInCorpus -< ()
92 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
94 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
95 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
96 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
97 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
98 returnA -< FacetDoc (n^.ns_id )
105 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
106 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
108 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
109 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
111 ------------------------------------------------------------------------
112 type AuthorName = Text
114 -- | TODO Optim: Offset and Limit in the Query
115 -- TODO-SECURITY check
116 searchInCorpusWithContacts
123 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
124 searchInCorpusWithContacts cId lId q o l order =
125 take (maybe 10 identity l)
126 <$> drop (maybe 0 identity o)
127 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
128 <$> toList <$> fromListWith (<>)
129 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
130 , catMaybes [Pair <$> p1 <*> p2]
133 <$> searchInCorpusWithContacts' cId lId q o l order
135 -- TODO-SECURITY check
136 searchInCorpusWithContacts'
143 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
144 searchInCorpusWithContacts' cId lId q o l order =
145 runOpaQuery $ queryInCorpusWithContacts cId lId o l order
150 queryInCorpusWithContacts
157 -> O.Query FacetPairedRead
158 queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
159 (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
160 restrict -< (n^.ns_search) @@ (pgTSQuery $ unpack q )
161 restrict -< (n^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
162 -- restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
163 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
164 -- -- restrict -< (nng_listType nng) .== (toNullable $ pgNgramsType Authors)
165 -- restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
166 -- -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
167 returnA -< FacetPaired (n^.ns_id)
171 (contacts^.node_id, ngrams'^.ngrams_terms)
173 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
175 , ( NodeNodeNgramsReadNull
177 , ( NodeNodeNgramsReadNull
184 joinInCorpusWithContacts =
187 queryNodeNodeNgramsTable
189 queryNodeNodeNgramsTable
198 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
199 cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
201 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
202 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
204 cond34 :: ( NodeNodeNgramsRead
206 , ( NodeNodeNgramsReadNull
211 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
213 cond45 :: ( NodeNodeRead
214 , ( NodeNodeNgramsRead
216 , ( NodeNodeNgramsReadNull
222 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
224 cond56 :: ( NodeSearchRead
226 , ( NodeNodeNgramsReadNull
228 , ( NodeNodeNgramsReadNull
235 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
238 newtype TSQuery = UnsafeTSQuery [Text]
240 -- | TODO [""] -> panic "error"
241 toTSQuery :: [Text] -> TSQuery
242 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
245 instance IsString TSQuery
247 fromString = UnsafeTSQuery . words . cs
250 instance ToField TSQuery
252 toField (UnsafeTSQuery xs)
253 = Many $ intersperse (Plain " && ")
254 $ map (\q -> Many [ Plain "plainto_tsquery("
260 data Order = Asc | Desc
262 instance ToField Order
264 toField Asc = Plain "ASC"
265 toField Desc = Plain "DESC"
271 textSearchQuery :: Query
272 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
273 \ , n.hyperdata->'title' \
274 \ , n.hyperdata->'source' \
275 \ , n.hyperdata->'authors' \
276 \ , COALESCE(nn.score,null) \
278 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
280 \ n.search @@ (?::tsquery) \
281 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
282 \ AND n.typename = ? \
283 \ ORDER BY n.hyperdata -> 'publication_date' ? \
286 -- | Text Search Function for Master Corpus
287 -- TODO : text search for user corpus
289 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
290 -- textSearchTest pId q = textSearch q pId 5 0 Asc
291 textSearch :: TSQuery -> ParentId
292 -> Limit -> Offset -> Order
293 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
294 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
296 typeId = nodeTypeId NodeDocument