2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
16 module Gargantext.Database.TextSearch where
19 import Data.Map.Strict hiding (map, drop, take)
21 import Control.Lens ((^.))
22 import Data.List (intersperse, take, drop)
23 import Data.String (IsString(..))
24 import Data.Text (Text, words, unpack, intercalate)
25 import Data.Time (UTCTime)
26 import Database.PostgreSQL.Simple (Query)
27 import Database.PostgreSQL.Simple.ToField
28 import Gargantext.Database.Config (nodeTypeId)
29 import Gargantext.Database.Types.Node (NodeType(..))
30 import Gargantext.Prelude
31 --import Gargantext.Database.Node.Contact
32 import Gargantext.Database.Facet
33 import Gargantext.Database.Schema.Node
34 import Gargantext.Database.Schema.Ngrams
35 import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
36 import Gargantext.Database.Schema.NodeNodeNgrams
37 import Gargantext.Database.Queries.Join (leftJoin6)
38 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery)
39 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
40 import Gargantext.Core.Types
41 import Control.Arrow (returnA)
42 import qualified Opaleye as O hiding (Order)
43 import Opaleye hiding (Query, Order)
46 ------------------------------------------------------------------------
47 searchInDatabase :: ParentId
49 -> Cmd err [(NodeId, HyperdataDocument)]
50 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
52 -- | Global search query where ParentId is Master Node Corpus Id
53 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
54 queryInDatabase _ q = proc () -> do
55 row <- queryNodeSearchTable -< ()
56 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
57 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
58 returnA -< (_ns_id row, _ns_hyperdata row)
60 ------------------------------------------------------------------------
61 -- | todo add limit and offset and order
62 searchInCorpus :: CorpusId
69 searchInCorpus cId t q o l order = runOpaQuery (filterWith o l order $ queryInCorpus cId t q')
71 q' = intercalate " | " $ map stemIt q
73 queryInCorpus :: CorpusId
76 -> O.Query FacetDocRead
77 queryInCorpus cId t q = proc () -> do
78 (n, nn) <- joinInCorpus -< ()
79 restrict -< ( nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
81 then ( nn^.nn_category) .== (toNullable $ pgInt4 0)
82 else ( nn^.nn_category) .>= (toNullable $ pgInt4 1)
83 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
84 restrict -< (n ^. ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
85 returnA -< FacetDoc (n^.ns_id )
92 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
93 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
95 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
96 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
98 ------------------------------------------------------------------------
99 type AuthorName = Text
101 -- | TODO Optim: Offset and Limit in the Query
102 -- TODO-SECURITY check
103 searchInCorpusWithContacts
110 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
111 searchInCorpusWithContacts cId lId q o l order =
112 take (maybe 10 identity l)
113 <$> drop (maybe 0 identity o)
114 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
115 <$> toList <$> fromListWith (<>)
116 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
117 , catMaybes [Pair <$> p1 <*> p2]
120 <$> searchInCorpusWithContacts' cId lId q o l order
122 -- TODO-SECURITY check
123 searchInCorpusWithContacts'
130 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
131 searchInCorpusWithContacts' cId lId q o l order =
132 runOpaQuery $ queryInCorpusWithContacts cId lId q' o l order
134 q' = intercalate " | " $ map stemIt q
137 queryInCorpusWithContacts
144 -> O.Query FacetPairedRead
145 queryInCorpusWithContacts cId lId q _ _ _ = proc () -> do
146 (docs, (corpusDoc, (docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
147 restrict -< (docs^.ns_search) @@ (pgTSQuery $ unpack q )
148 restrict -< (docs^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
149 restrict -< (docNgrams^.nnng_node2_id) .== (toNullable $ pgNodeId lId)
150 restrict -< (corpusDoc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
151 -- restrict -< (nng_listType docNgrams) .== (toNullable $ pgNgramsType Authors)
152 restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
153 -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
154 returnA -< FacetPaired (docs^.ns_id) (docs^.ns_date) (docs^.ns_hyperdata) (pgInt4 0) (contacts^.node_id, ngrams'^.ngrams_terms)
156 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
158 , ( NodeNodeNgramsReadNull
160 , ( NodeNodeNgramsReadNull
167 joinInCorpusWithContacts =
170 queryNodeNodeNgramsTable
172 queryNodeNodeNgramsTable
181 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
182 cond12 (ng3, n2) = n2^.node_id .== ng3^.nnng_node1_id
184 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
185 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
187 cond34 :: ( NodeNodeNgramsRead
189 , ( NodeNodeNgramsReadNull
194 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
196 cond45 :: ( NodeNodeRead
197 , ( NodeNodeNgramsRead
199 , ( NodeNodeNgramsReadNull
205 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
207 cond56 :: ( NodeSearchRead
209 , ( NodeNodeNgramsReadNull
211 , ( NodeNodeNgramsReadNull
218 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
221 newtype TSQuery = UnsafeTSQuery [Text]
223 -- | TODO [""] -> panic "error"
224 toTSQuery :: [Text] -> TSQuery
225 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
228 instance IsString TSQuery
230 fromString = UnsafeTSQuery . words . cs
233 instance ToField TSQuery
235 toField (UnsafeTSQuery xs)
236 = Many $ intersperse (Plain " && ")
237 $ map (\q -> Many [ Plain "plainto_tsquery("
243 data Order = Asc | Desc
245 instance ToField Order
247 toField Asc = Plain "ASC"
248 toField Desc = Plain "DESC"
254 textSearchQuery :: Query
255 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
256 \ , n.hyperdata->'title' \
257 \ , n.hyperdata->'source' \
258 \ , n.hyperdata->'authors' \
259 \ , COALESCE(nn.score,null) \
261 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
263 \ n.search @@ (?::tsquery) \
264 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
265 \ AND n.typename = ? \
266 \ ORDER BY n.hyperdata -> 'publication_date' ? \
269 -- | Text Search Function for Master Corpus
270 -- TODO : text search for user corpus
272 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
273 -- textSearchTest pId q = textSearch q pId 5 0 Asc
274 textSearch :: TSQuery -> ParentId
275 -> Limit -> Offset -> Order
276 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
277 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
279 typeId = nodeTypeId NodeDocument