2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
16 module Gargantext.Database.TextSearch where
19 import Data.Map.Strict hiding (map, drop, take)
21 import Control.Lens ((^.))
22 import Data.List (intersperse, take, drop)
23 import Data.String (IsString(..))
24 import Data.Text (Text, words, unpack, intercalate)
25 import Data.Time (UTCTime)
26 import Database.PostgreSQL.Simple (Query)
27 import Database.PostgreSQL.Simple.ToField
28 import Gargantext.Database.Config (nodeTypeId)
29 import Gargantext.Database.Types.Node (NodeType(..))
30 import Gargantext.Prelude
31 --import Gargantext.Database.Node.Contact
32 import Gargantext.Database.Facet
33 import Gargantext.Database.Schema.Node
34 import Gargantext.Database.Schema.Ngrams
35 import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
36 import Gargantext.Database.Schema.NodeNodeNgrams
37 import Gargantext.Database.Queries.Join (leftJoin6)
38 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
39 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
40 import Gargantext.Core.Types
41 import Control.Arrow (returnA)
42 import qualified Opaleye as O hiding (Order)
43 import Opaleye hiding (Query, Order)
46 ------------------------------------------------------------------------
47 searchInDatabase :: ParentId
49 -> Cmd err [(NodeId, HyperdataDocument)]
50 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
52 -- | Global search query where ParentId is Master Node Corpus Id
53 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
54 queryInDatabase _ q = proc () -> do
55 row <- queryNodeSearchTable -< ()
56 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
57 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
58 returnA -< (_ns_id row, _ns_hyperdata row)
60 ------------------------------------------------------------------------
61 -- | todo add limit and offset and order
62 searchInCorpus :: CorpusId
69 searchInCorpus cId t q o l order = runOpaQuery
70 $ filterWith o l order
75 searchCountInCorpus :: CorpusId
79 searchCountInCorpus cId t q = runCountOpaQuery
84 queryInCorpus :: CorpusId
87 -> O.Query FacetDocRead
88 queryInCorpus cId t q = proc () -> do
89 (n, nn) <- joinInCorpus -< ()
90 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
92 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
93 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
94 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
95 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
96 returnA -< FacetDoc (n^.ns_id )
103 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
104 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
106 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
107 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
109 ------------------------------------------------------------------------
110 type AuthorName = Text
112 -- | TODO Optim: Offset and Limit in the Query
113 -- TODO-SECURITY check
114 searchInCorpusWithContacts
121 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
122 searchInCorpusWithContacts cId lId q o l order =
123 take (maybe 10 identity l)
124 <$> drop (maybe 0 identity o)
125 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
126 <$> toList <$> fromListWith (<>)
127 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
128 , catMaybes [Pair <$> p1 <*> p2]
131 <$> searchInCorpusWithContacts' cId lId q o l order
133 -- TODO-SECURITY check
134 searchInCorpusWithContacts'
141 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
142 searchInCorpusWithContacts' cId lId q o l order =
143 runOpaQuery $ queryInCorpusWithContacts cId lId o l order
148 queryInCorpusWithContacts
155 -> O.Query FacetPairedRead
156 queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
157 (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
158 restrict -< (n^.ns_search) @@ (pgTSQuery $ unpack q )
159 restrict -< (n^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
160 -- restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
161 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
162 -- -- restrict -< (nng_listType nng) .== (toNullable $ pgNgramsType Authors)
163 -- restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
164 -- -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
165 returnA -< FacetPaired (n^.ns_id)
169 (contacts^.node_id, ngrams'^.ngrams_terms)
171 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
173 , ( NodeNodeNgramsReadNull
175 , ( NodeNodeNgramsReadNull
182 joinInCorpusWithContacts =
185 queryNodeNodeNgramsTable
187 queryNodeNodeNgramsTable
196 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
197 cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
199 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
200 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
202 cond34 :: ( NodeNodeNgramsRead
204 , ( NodeNodeNgramsReadNull
209 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
211 cond45 :: ( NodeNodeRead
212 , ( NodeNodeNgramsRead
214 , ( NodeNodeNgramsReadNull
220 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
222 cond56 :: ( NodeSearchRead
224 , ( NodeNodeNgramsReadNull
226 , ( NodeNodeNgramsReadNull
233 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
236 newtype TSQuery = UnsafeTSQuery [Text]
238 -- | TODO [""] -> panic "error"
239 toTSQuery :: [Text] -> TSQuery
240 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
243 instance IsString TSQuery
245 fromString = UnsafeTSQuery . words . cs
248 instance ToField TSQuery
250 toField (UnsafeTSQuery xs)
251 = Many $ intersperse (Plain " && ")
252 $ map (\q -> Many [ Plain "plainto_tsquery("
258 data Order = Asc | Desc
260 instance ToField Order
262 toField Asc = Plain "ASC"
263 toField Desc = Plain "DESC"
269 textSearchQuery :: Query
270 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
271 \ , n.hyperdata->'title' \
272 \ , n.hyperdata->'source' \
273 \ , n.hyperdata->'authors' \
274 \ , COALESCE(nn.score,null) \
276 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
278 \ n.search @@ (?::tsquery) \
279 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
280 \ AND n.typename = ? \
281 \ ORDER BY n.hyperdata -> 'publication_date' ? \
284 -- | Text Search Function for Master Corpus
285 -- TODO : text search for user corpus
287 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
288 -- textSearchTest pId q = textSearch q pId 5 0 Asc
289 textSearch :: TSQuery -> ParentId
290 -> Limit -> Offset -> Order
291 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
292 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
294 typeId = nodeTypeId NodeDocument