]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
[WIP] G.P.Utils.shuffle + maxClique.
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15
16 module Gargantext.Database.TextSearch where
17
18 import Data.Aeson
19 import Data.Map.Strict hiding (map, drop, take)
20 import Data.Maybe
21 import Data.List (intersperse, take, drop)
22 import Data.String (IsString(..))
23 import Data.Text (Text, words, unpack, intercalate)
24 import Data.Time (UTCTime)
25 import Database.PostgreSQL.Simple (Query)
26 import Database.PostgreSQL.Simple.ToField
27 import Gargantext.Database.Config (nodeTypeId)
28 import Gargantext.Database.Types.Node (NodeType(..))
29 import Gargantext.Prelude
30 --import Gargantext.Database.Node.Contact
31 import Gargantext.Database.Facet
32 import Gargantext.Database.Schema.Node
33 import Gargantext.Database.Schema.Ngrams
34 import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
35 import Gargantext.Database.Schema.NodeNodeNgrams
36 import Gargantext.Database.Queries.Join (leftJoin6)
37 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery)
38 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
39 import Gargantext.Core.Types
40 import Control.Arrow (returnA)
41 import qualified Opaleye as O hiding (Order)
42 import Opaleye hiding (Query, Order)
43
44
45 ------------------------------------------------------------------------
46 searchInDatabase :: ParentId -> Text -> Cmd err [(NodeId, HyperdataDocument)]
47 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
48
49 -- | Global search query where ParentId is Master Node Corpus Id
50 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
51 queryInDatabase _ q = proc () -> do
52 row <- queryNodeSearchTable -< ()
53 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
54 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
55 returnA -< (_ns_id row, _ns_hyperdata row)
56
57 ------------------------------------------------------------------------
58 -- | todo add limit and offset and order
59 searchInCorpus :: CorpusId -> IsTrash -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc]
60 searchInCorpus cId t q o l order = runOpaQuery (filterWith o l order $ queryInCorpus cId t q')
61 where
62 q' = intercalate " | " $ map stemIt q
63
64 queryInCorpus :: CorpusId -> IsTrash -> Text -> O.Query FacetDocRead
65 queryInCorpus cId t q = proc () -> do
66 (n, nn) <- joinInCorpus -< ()
67 restrict -< ( nn_node1_id nn) .== (toNullable $ pgNodeId cId)
68 restrict -< if t
69 then ( nn_category nn) .== (toNullable $ pgInt4 0)
70 else ( nn_category nn) .>= (toNullable $ pgInt4 1)
71 restrict -< (_ns_search n) @@ (pgTSQuery (unpack q))
72 restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
73 returnA -< FacetDoc (_ns_id n) (_ns_date n) (_ns_name n) (_ns_hyperdata n) (nn_category nn) (nn_score nn)
74
75 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
76 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
77 where
78 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
79 cond (n, nn) = nn_node2_id nn .== _ns_id n
80
81 ------------------------------------------------------------------------
82 type AuthorName = Text
83
84 -- | TODO Optim: Offset and Limit in the Query
85 -- TODO-SECURITY check
86 searchInCorpusWithContacts :: CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
87 searchInCorpusWithContacts cId q o l order = take (maybe 5 identity l) <$> drop (maybe 0 identity o)
88 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s (catMaybes ps))
89 <$> toList <$> fromListWith (<>)
90 <$> map (\(FacetPaired i u h s p) -> ((i,u,h,s), [maybePair p]))
91 <$> searchInCorpusWithContacts' cId q o l order
92 where
93 maybePair (Pair Nothing Nothing) = Nothing
94 maybePair (Pair _ Nothing) = Nothing
95 maybePair (Pair Nothing _) = Nothing
96 maybePair (Pair (Just p_id) (Just p_label)) = Just $ Pair p_id p_label
97
98 -- TODO-SECURITY check
99 searchInCorpusWithContacts' :: CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Pair (Maybe Int) (Maybe Text)))]
100 searchInCorpusWithContacts' cId q o l order = runOpaQuery $ queryInCorpusWithContacts cId q' o l order
101 where
102 q' = intercalate " | " $ map stemIt q
103
104
105
106 queryInCorpusWithContacts :: CorpusId -> Text -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> O.Query FacetPairedRead
107 queryInCorpusWithContacts cId q _ _ _ = proc () -> do
108 (docs, (corpusDoc, (_docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
109 restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
110 restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
111 restrict -< (nn_node1_id corpusDoc) .== (toNullable $ pgNodeId cId)
112 -- restrict -< (nng_listType docNgrams) .== (toNullable $ pgNgramsType Authors)
113 restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
114 -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
115 returnA -< FacetPaired (_ns_id docs) (_ns_date docs) (_ns_hyperdata docs) (pgInt4 0) (Pair (_node_id contacts) (ngrams_terms ngrams'))
116
117 joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNodeNgramsReadNull, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull)))))
118 joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNodeNgramsTable queryNgramsTable queryNodeNodeNgramsTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
119 where
120 cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
121 cond12 (ng3, n2) = _node_id n2 .== nnng_node1_id ng3
122 ---------
123 cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
124 cond23 (ng2, (nnng2, _)) = nnng_ngrams_id nnng2 .== ngrams_id ng2
125
126 cond34 :: (NodeNodeNgramsRead, (NgramsRead, (NodeNodeNgramsReadNull, NodeReadNull))) -> Column PGBool
127 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nnng_ngrams_id nng
128
129 cond45 :: (NodeNodeRead, (NodeNodeNgramsRead, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull)))) -> Column PGBool
130 cond45 (nn, (nng, (_,(_,_)))) = nnng_node1_id nng .== nn_node2_id nn
131
132 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNodeNgramsReadNull, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull))))) -> Column PGBool
133 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn_node2_id nn
134
135
136
137
138 newtype TSQuery = UnsafeTSQuery [Text]
139
140 -- | TODO [""] -> panic "error"
141 toTSQuery :: [Text] -> TSQuery
142 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
143
144
145 instance IsString TSQuery
146 where
147 fromString = UnsafeTSQuery . words . cs
148
149
150 instance ToField TSQuery
151 where
152 toField (UnsafeTSQuery xs)
153 = Many $ intersperse (Plain " && ")
154 $ map (\q -> Many [ Plain "plainto_tsquery("
155 , Escape (cs q)
156 , Plain ")"
157 ]
158 ) xs
159
160 data Order = Asc | Desc
161
162 instance ToField Order
163 where
164 toField Asc = Plain "ASC"
165 toField Desc = Plain "DESC"
166
167 -- TODO
168 -- FIX fav
169 -- ADD ngrams count
170 -- TESTS
171 textSearchQuery :: Query
172 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
173 \ , n.hyperdata->'title' \
174 \ , n.hyperdata->'source' \
175 \ , n.hyperdata->'authors' \
176 \ , COALESCE(nn.score,null) \
177 \ FROM nodes n \
178 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
179 \ WHERE \
180 \ n.search @@ (?::tsquery) \
181 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
182 \ AND n.typename = ? \
183 \ ORDER BY n.hyperdata -> 'publication_date' ? \
184 \ offset ? limit ?;"
185
186 -- | Text Search Function for Master Corpus
187 -- TODO : text search for user corpus
188 -- Example:
189 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
190 -- textSearchTest pId q = textSearch q pId 5 0 Asc
191 textSearch :: TSQuery -> ParentId
192 -> Limit -> Offset -> Order
193 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
194 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
195 where
196 typeId = nodeTypeId NodeDocument
197
198