2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
13 module Gargantext.Database.Action.Search where
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
18 import Data.List (intersperse, take, drop)
19 import Data.Map.Strict hiding (map, drop, take)
21 import Data.String (IsString(..))
22 import Data.Text (Text, words, unpack, intercalate)
23 import Data.Time (UTCTime)
24 import Database.PostgreSQL.Simple (Query)
25 import Database.PostgreSQL.Simple.ToField
26 import Opaleye hiding (Query, Order)
27 import qualified Opaleye as O hiding (Order)
29 import Gargantext.Core.Types
30 import Gargantext.Database.Admin.Config (nodeTypeId)
31 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
32 import Gargantext.Database.Admin.Types.Node (NodeType(..))
33 import Gargantext.Database.Query.Facet
34 import Gargantext.Database.Query.Join (leftJoin6)
35 import Gargantext.Database.Query.Table.Node
36 import Gargantext.Database.Query.Table.NodeNode
37 import Gargantext.Database.Query.Table.NodeNodeNgrams
38 import Gargantext.Database.Query.Table.Ngrams
39 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
40 import Gargantext.Database.Schema.Node
41 import Gargantext.Prelude
42 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
44 ------------------------------------------------------------------------
45 searchInDatabase :: ParentId
47 -> Cmd err [(NodeId, HyperdataDocument)]
48 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
50 -- | Global search query where ParentId is Master Node Corpus Id
51 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
52 queryInDatabase _ q = proc () -> do
53 row <- queryNodeSearchTable -< ()
54 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
55 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
56 returnA -< (_ns_id row, _ns_hyperdata row)
58 ------------------------------------------------------------------------
59 -- | todo add limit and offset and order
60 searchInCorpus :: CorpusId
67 searchInCorpus cId t q o l order = runOpaQuery
68 $ filterWith o l order
73 searchCountInCorpus :: CorpusId
77 searchCountInCorpus cId t q = runCountOpaQuery
82 queryInCorpus :: CorpusId
85 -> O.Query FacetDocRead
86 queryInCorpus cId t q = proc () -> do
87 (n, nn) <- joinInCorpus -< ()
88 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
90 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
91 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
92 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
93 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
94 returnA -< FacetDoc (n^.ns_id )
101 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
102 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
104 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
105 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
107 ------------------------------------------------------------------------
108 type AuthorName = Text
110 -- | TODO Optim: Offset and Limit in the Query
111 -- TODO-SECURITY check
112 searchInCorpusWithContacts
119 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
120 searchInCorpusWithContacts cId lId q o l order =
121 take (maybe 10 identity l)
122 <$> drop (maybe 0 identity o)
123 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
124 <$> toList <$> fromListWith (<>)
125 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
126 , catMaybes [Pair <$> p1 <*> p2]
129 <$> searchInCorpusWithContacts' cId lId q o l order
131 -- TODO-SECURITY check
132 searchInCorpusWithContacts'
139 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
140 searchInCorpusWithContacts' cId lId q o l order =
141 runOpaQuery $ queryInCorpusWithContacts cId lId o l order
146 queryInCorpusWithContacts
153 -> O.Query FacetPairedRead
154 queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
155 (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
156 restrict -< (n^.ns_search) @@ (pgTSQuery $ unpack q )
157 restrict -< (n^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
158 -- restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
159 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
160 -- -- restrict -< (nng_listType nng) .== (toNullable $ pgNgramsType Authors)
161 -- restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
162 -- -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
163 returnA -< FacetPaired (n^.ns_id)
167 (contacts^.node_id, ngrams'^.ngrams_terms)
169 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
171 , ( NodeNodeNgramsReadNull
173 , ( NodeNodeNgramsReadNull
180 joinInCorpusWithContacts =
183 queryNodeNodeNgramsTable
185 queryNodeNodeNgramsTable
194 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
195 cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
197 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
198 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
200 cond34 :: ( NodeNodeNgramsRead
202 , ( NodeNodeNgramsReadNull
207 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
209 cond45 :: ( NodeNodeRead
210 , ( NodeNodeNgramsRead
212 , ( NodeNodeNgramsReadNull
218 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
220 cond56 :: ( NodeSearchRead
222 , ( NodeNodeNgramsReadNull
224 , ( NodeNodeNgramsReadNull
231 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
234 newtype TSQuery = UnsafeTSQuery [Text]
236 -- | TODO [""] -> panic "error"
237 toTSQuery :: [Text] -> TSQuery
238 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
241 instance IsString TSQuery
243 fromString = UnsafeTSQuery . words . cs
246 instance ToField TSQuery
248 toField (UnsafeTSQuery xs)
249 = Many $ intersperse (Plain " && ")
250 $ map (\q -> Many [ Plain "plainto_tsquery("
256 data Order = Asc | Desc
258 instance ToField Order
260 toField Asc = Plain "ASC"
261 toField Desc = Plain "DESC"
267 textSearchQuery :: Query
268 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
269 \ , n.hyperdata->'title' \
270 \ , n.hyperdata->'source' \
271 \ , n.hyperdata->'authors' \
272 \ , COALESCE(nn.score,null) \
274 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
276 \ n.search @@ (?::tsquery) \
277 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
278 \ AND n.typename = ? \
279 \ ORDER BY n.hyperdata -> 'publication_date' ? \
282 -- | Text Search Function for Master Corpus
283 -- TODO : text search for user corpus
285 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
286 -- textSearchTest pId q = textSearch q pId 5 0 Asc
287 textSearch :: TSQuery -> ParentId
288 -> Limit -> Offset -> Order
289 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
290 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
292 typeId = nodeTypeId NodeDocument