2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE Arrows #-}
13 module Gargantext.Database.Action.Search where
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
18 import Data.List (intersperse, take, drop)
19 import Data.Map.Strict hiding (map, drop, take)
21 import Data.String (IsString(..))
22 import Data.Text (Text, words, unpack, intercalate)
23 import Data.Time (UTCTime)
24 import Database.PostgreSQL.Simple (Query)
25 import Database.PostgreSQL.Simple.ToField
26 import Gargantext.Core.Types
27 import Gargantext.Database.Query.Facet
28 import Gargantext.Database.Query.Join (leftJoin6)
29 import Gargantext.Database.Query.Table.Node
30 import Gargantext.Database.Query.Table.NodeNode
31 import Gargantext.Database.Query.Table.NodeNodeNgrams
32 import Gargantext.Database.Query.Table.Ngrams
33 import Gargantext.Database.Admin.Config (nodeTypeId)
34 import Gargantext.Database.Admin.Types.Node (NodeType(..))
35 import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
36 import Gargantext.Database.Schema.Node
37 import Gargantext.Prelude
38 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
39 import Opaleye hiding (Query, Order)
40 import qualified Opaleye as O hiding (Order)
42 ------------------------------------------------------------------------
43 searchInDatabase :: ParentId
45 -> Cmd err [(NodeId, HyperdataDocument)]
46 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
48 -- | Global search query where ParentId is Master Node Corpus Id
49 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
50 queryInDatabase _ q = proc () -> do
51 row <- queryNodeSearchTable -< ()
52 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
53 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
54 returnA -< (_ns_id row, _ns_hyperdata row)
56 ------------------------------------------------------------------------
57 -- | todo add limit and offset and order
58 searchInCorpus :: CorpusId
65 searchInCorpus cId t q o l order = runOpaQuery
66 $ filterWith o l order
71 searchCountInCorpus :: CorpusId
75 searchCountInCorpus cId t q = runCountOpaQuery
80 queryInCorpus :: CorpusId
83 -> O.Query FacetDocRead
84 queryInCorpus cId t q = proc () -> do
85 (n, nn) <- joinInCorpus -< ()
86 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
88 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
89 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
90 restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
91 restrict -< (n ^. ns_typename ) .== (pgInt4 $ nodeTypeId NodeDocument)
92 returnA -< FacetDoc (n^.ns_id )
99 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
100 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
102 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
103 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
105 ------------------------------------------------------------------------
106 type AuthorName = Text
108 -- | TODO Optim: Offset and Limit in the Query
109 -- TODO-SECURITY check
110 searchInCorpusWithContacts
117 -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
118 searchInCorpusWithContacts cId lId q o l order =
119 take (maybe 10 identity l)
120 <$> drop (maybe 0 identity o)
121 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
122 <$> toList <$> fromListWith (<>)
123 <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
124 , catMaybes [Pair <$> p1 <*> p2]
127 <$> searchInCorpusWithContacts' cId lId q o l order
129 -- TODO-SECURITY check
130 searchInCorpusWithContacts'
137 -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
138 searchInCorpusWithContacts' cId lId q o l order =
139 runOpaQuery $ queryInCorpusWithContacts cId lId o l order
144 queryInCorpusWithContacts
151 -> O.Query FacetPairedRead
152 queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
153 (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
154 restrict -< (n^.ns_search) @@ (pgTSQuery $ unpack q )
155 restrict -< (n^.ns_typename) .== (pgInt4 $ nodeTypeId NodeDocument)
156 -- restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
157 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
158 -- -- restrict -< (nng_listType nng) .== (toNullable $ pgNgramsType Authors)
159 -- restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
160 -- -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
161 returnA -< FacetPaired (n^.ns_id)
165 (contacts^.node_id, ngrams'^.ngrams_terms)
167 joinInCorpusWithContacts :: O.Query ( NodeSearchRead
169 , ( NodeNodeNgramsReadNull
171 , ( NodeNodeNgramsReadNull
178 joinInCorpusWithContacts =
181 queryNodeNodeNgramsTable
183 queryNodeNodeNgramsTable
192 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
193 cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
195 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
196 cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id
198 cond34 :: ( NodeNodeNgramsRead
200 , ( NodeNodeNgramsReadNull
205 cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
207 cond45 :: ( NodeNodeRead
208 , ( NodeNodeNgramsRead
210 , ( NodeNodeNgramsReadNull
216 cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
218 cond56 :: ( NodeSearchRead
220 , ( NodeNodeNgramsReadNull
222 , ( NodeNodeNgramsReadNull
229 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
232 newtype TSQuery = UnsafeTSQuery [Text]
234 -- | TODO [""] -> panic "error"
235 toTSQuery :: [Text] -> TSQuery
236 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
239 instance IsString TSQuery
241 fromString = UnsafeTSQuery . words . cs
244 instance ToField TSQuery
246 toField (UnsafeTSQuery xs)
247 = Many $ intersperse (Plain " && ")
248 $ map (\q -> Many [ Plain "plainto_tsquery("
254 data Order = Asc | Desc
256 instance ToField Order
258 toField Asc = Plain "ASC"
259 toField Desc = Plain "DESC"
265 textSearchQuery :: Query
266 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
267 \ , n.hyperdata->'title' \
268 \ , n.hyperdata->'source' \
269 \ , n.hyperdata->'authors' \
270 \ , COALESCE(nn.score,null) \
272 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
274 \ n.search @@ (?::tsquery) \
275 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
276 \ AND n.typename = ? \
277 \ ORDER BY n.hyperdata -> 'publication_date' ? \
280 -- | Text Search Function for Master Corpus
281 -- TODO : text search for user corpus
283 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
284 -- textSearchTest pId q = textSearch q pId 5 0 Asc
285 textSearch :: TSQuery -> ParentId
286 -> Limit -> Offset -> Order
287 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
288 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
290 typeId = nodeTypeId NodeDocument