2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE RankNTypes #-}
17 module Gargantext.Database.TextSearch where
20 import Data.Map.Strict hiding (map, drop, take)
22 import Control.Lens ((^.))
23 import Data.List (intersperse, take, drop)
24 import Data.String (IsString(..))
25 import Data.Text (Text, words, unpack, intercalate)
26 import Data.Time (UTCTime)
27 import Database.PostgreSQL.Simple (Query)
28 import Database.PostgreSQL.Simple.ToField
29 import Gargantext.Database.Config (nodeTypeId)
30 import Gargantext.Database.Types.Node (NodeType(..))
31 import Gargantext.Prelude
32 --import Gargantext.Database.Node.Contact
33 import Gargantext.Database.Facet
34 import Gargantext.Database.Schema.Node
35 import Gargantext.Database.Schema.Ngrams
36 import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
37 import Gargantext.Database.Schema.NodeNodeNgrams
38 import Gargantext.Database.Queries.Join (leftJoin6)
39 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
40 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
41 import Gargantext.Core.Types
42 import Control.Arrow (returnA)
43 import qualified Opaleye as O hiding (Order)
44 import Opaleye hiding (Query, Order)
47 ------------------------------------------------------------------------
48 searchInDatabase :: ParentId
50 -> Cmd err [(NodeId, HyperdataDocument)]
51 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
53 -- | Global search query where ParentId is Master Node Corpus Id
54 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
55 queryInDatabase _ q = proc () -> do
56 row <- queryNodeSearchTable -< ()
57 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
58 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
59 returnA -< (_ns_id row, _ns_hyperdata row)
61 ------------------------------------------------------------------------
62 -- | todo add limit and offset and order
63 searchInCorpus :: CorpusId
70 searchInCorpus cId t q o l order = runOpaQuery
71 $ filterWith o l order
76 searchCountInCorpus :: CorpusId
80 searchCountInCorpus cId t q = runCountOpaQuery
85 queryInCorpus :: CorpusId
88 -> O.Query FacetDocRead
89 queryInCorpus cId t q = proc () -> do
90 (n, nn) <- joinInCorpus -< ()
91 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
93 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
94 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
95 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
96 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
97 returnA -< FacetDoc (n^.ns_id )
104 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
105 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
107 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
108 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
110 ------------------------------------------------------------------------
111 type AuthorName = Text
113 -- | TODO Optim: Offset and Limit in the Query
114 -- TODO-SECURITY check
115 searchInCorpusWithContacts
122 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
123 searchInCorpusWithContacts cId lId q o l order =
124 take (maybe 10 identity l)
125 <$> drop (maybe 0 identity o)
126 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
127 <$> toList <$> fromListWith (<>)
128 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
129 , catMaybes [Pair <$> p1 <*> p2]
132 <$> searchInCorpusWithContacts' cId lId q o l order
134 -- TODO-SECURITY check
135 searchInCorpusWithContacts'
142 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
143 searchInCorpusWithContacts' cId lId q o l order =
144 runOpaQuery $ queryInCorpusWithContacts cId lId o l order
149 queryInCorpusWithContacts
156 -> O.Query FacetPairedRead
157 queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
158 (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
159 restrict -< (n^.ns_search) @@ (pgTSQuery $ unpack q )
160 restrict -< (n^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
161 -- restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
162 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
163 -- -- restrict -< (nng_listType nng) .== (toNullable $ pgNgramsType Authors)
164 -- restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
165 -- -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
166 returnA -< FacetPaired (n^.ns_id)
170 (contacts^.node_id, ngrams'^.ngrams_terms)
172 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
174 , ( NodeNodeNgramsReadNull
176 , ( NodeNodeNgramsReadNull
183 joinInCorpusWithContacts =
186 queryNodeNodeNgramsTable
188 queryNodeNodeNgramsTable
197 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
198 cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
200 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
201 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
203 cond34 :: ( NodeNodeNgramsRead
205 , ( NodeNodeNgramsReadNull
210 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
212 cond45 :: ( NodeNodeRead
213 , ( NodeNodeNgramsRead
215 , ( NodeNodeNgramsReadNull
221 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
223 cond56 :: ( NodeSearchRead
225 , ( NodeNodeNgramsReadNull
227 , ( NodeNodeNgramsReadNull
234 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
237 newtype TSQuery = UnsafeTSQuery [Text]
239 -- | TODO [""] -> panic "error"
240 toTSQuery :: [Text] -> TSQuery
241 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
244 instance IsString TSQuery
246 fromString = UnsafeTSQuery . words . cs
249 instance ToField TSQuery
251 toField (UnsafeTSQuery xs)
252 = Many $ intersperse (Plain " && ")
253 $ map (\q -> Many [ Plain "plainto_tsquery("
259 data Order = Asc | Desc
261 instance ToField Order
263 toField Asc = Plain "ASC"
264 toField Desc = Plain "DESC"
270 textSearchQuery :: Query
271 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
272 \ , n.hyperdata->'title' \
273 \ , n.hyperdata->'source' \
274 \ , n.hyperdata->'authors' \
275 \ , COALESCE(nn.score,null) \
277 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
279 \ n.search @@ (?::tsquery) \
280 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
281 \ AND n.typename = ? \
282 \ ORDER BY n.hyperdata -> 'publication_date' ? \
285 -- | Text Search Function for Master Corpus
286 -- TODO : text search for user corpus
288 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
289 -- textSearchTest pId q = textSearch q pId 5 0 Asc
290 textSearch :: TSQuery -> ParentId
291 -> Limit -> Offset -> Order
292 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
293 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
295 typeId = nodeTypeId NodeDocument