2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
16 module Gargantext.Database.TextSearch where
19 import Data.Map.Strict hiding (map, drop, take)
21 import Control.Lens ((^.))
22 import Data.List (intersperse, take, drop)
23 import Data.String (IsString(..))
24 import Data.Text (Text, words, unpack, intercalate)
25 import Data.Time (UTCTime)
26 import Database.PostgreSQL.Simple (Query)
27 import Database.PostgreSQL.Simple.ToField
28 import Gargantext.Database.Config (nodeTypeId)
29 import Gargantext.Database.Types.Node (NodeType(..))
30 import Gargantext.Prelude
31 --import Gargantext.Database.Node.Contact
32 import Gargantext.Database.Facet
33 import Gargantext.Database.Schema.Node
34 import Gargantext.Database.Schema.Ngrams
35 import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
36 import Gargantext.Database.Schema.NodeNodeNgrams
37 import Gargantext.Database.Queries.Join (leftJoin6)
38 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery)
39 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
40 import Gargantext.Core.Types
41 import Control.Arrow (returnA)
42 import qualified Opaleye as O hiding (Order)
43 import Opaleye hiding (Query, Order)
46 ------------------------------------------------------------------------
47 searchInDatabase :: ParentId
49 -> Cmd err [(NodeId, HyperdataDocument)]
50 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
52 -- | Global search query where ParentId is Master Node Corpus Id
53 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
54 queryInDatabase _ q = proc () -> do
55 row <- queryNodeSearchTable -< ()
56 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
57 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
58 returnA -< (_ns_id row, _ns_hyperdata row)
60 ------------------------------------------------------------------------
61 -- | todo add limit and offset and order
62 searchInCorpus :: CorpusId
69 searchInCorpus cId t q o l order = runOpaQuery
70 $ filterWith o l order
75 queryInCorpus :: CorpusId
78 -> O.Query FacetDocRead
79 queryInCorpus cId t q = proc () -> do
80 (n, nn) <- joinInCorpus -< ()
81 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
83 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
84 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
85 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
86 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
87 returnA -< FacetDoc (n^.ns_id )
94 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
95 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
97 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
98 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
100 ------------------------------------------------------------------------
101 type AuthorName = Text
103 -- | TODO Optim: Offset and Limit in the Query
104 -- TODO-SECURITY check
105 searchInCorpusWithContacts
112 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
113 searchInCorpusWithContacts cId lId q o l order =
114 take (maybe 10 identity l)
115 <$> drop (maybe 0 identity o)
116 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
117 <$> toList <$> fromListWith (<>)
118 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
119 , catMaybes [Pair <$> p1 <*> p2]
122 <$> searchInCorpusWithContacts' cId lId q o l order
124 -- TODO-SECURITY check
125 searchInCorpusWithContacts'
132 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
133 searchInCorpusWithContacts' cId lId q o l order =
134 runOpaQuery $ queryInCorpusWithContacts cId lId o l order
139 queryInCorpusWithContacts
146 -> O.Query FacetPairedRead
147 queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
148 (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
149 restrict -< (n^.ns_search) @@ (pgTSQuery $ unpack q )
150 restrict -< (n^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
151 -- restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
152 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
153 -- -- restrict -< (nng_listType nng) .== (toNullable $ pgNgramsType Authors)
154 -- restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
155 -- -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
156 returnA -< FacetPaired (n^.ns_id)
160 (contacts^.node_id, ngrams'^.ngrams_terms)
162 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
164 , ( NodeNodeNgramsReadNull
166 , ( NodeNodeNgramsReadNull
173 joinInCorpusWithContacts =
176 queryNodeNodeNgramsTable
178 queryNodeNodeNgramsTable
187 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
188 cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
190 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
191 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
193 cond34 :: ( NodeNodeNgramsRead
195 , ( NodeNodeNgramsReadNull
200 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
202 cond45 :: ( NodeNodeRead
203 , ( NodeNodeNgramsRead
205 , ( NodeNodeNgramsReadNull
211 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
213 cond56 :: ( NodeSearchRead
215 , ( NodeNodeNgramsReadNull
217 , ( NodeNodeNgramsReadNull
224 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
227 newtype TSQuery = UnsafeTSQuery [Text]
229 -- | TODO [""] -> panic "error"
230 toTSQuery :: [Text] -> TSQuery
231 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
234 instance IsString TSQuery
236 fromString = UnsafeTSQuery . words . cs
239 instance ToField TSQuery
241 toField (UnsafeTSQuery xs)
242 = Many $ intersperse (Plain " && ")
243 $ map (\q -> Many [ Plain "plainto_tsquery("
249 data Order = Asc | Desc
251 instance ToField Order
253 toField Asc = Plain "ASC"
254 toField Desc = Plain "DESC"
260 textSearchQuery :: Query
261 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
262 \ , n.hyperdata->'title' \
263 \ , n.hyperdata->'source' \
264 \ , n.hyperdata->'authors' \
265 \ , COALESCE(nn.score,null) \
267 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
269 \ n.search @@ (?::tsquery) \
270 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
271 \ AND n.typename = ? \
272 \ ORDER BY n.hyperdata -> 'publication_date' ? \
275 -- | Text Search Function for Master Corpus
276 -- TODO : text search for user corpus
278 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
279 -- textSearchTest pId q = textSearch q pId 5 0 Asc
280 textSearch :: TSQuery -> ParentId
281 -> Limit -> Offset -> Order
282 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
283 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
285 typeId = nodeTypeId NodeDocument