]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
[Database][Query] search for doc 2 authors
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14
15 module Gargantext.Database.TextSearch where
16
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.String (IsString(..))
20 import Data.Text (Text, words, unpack)
21 import Database.PostgreSQL.Simple -- (Query, Connection)
22 import Database.PostgreSQL.Simple.ToField
23 import Gargantext.Database.Config (nodeTypeId)
24 import Gargantext.Database.Types.Node (NodeType(..))
25 import Gargantext.Prelude
26 import Gargantext.Database.Node.Contact
27 import Gargantext.Database.Schema.Node
28 import Gargantext.Database.Schema.Ngrams
29 import Gargantext.Database.Schema.NodeNode
30 import Gargantext.Database.Schema.NodeNgram
31 import Gargantext.Database.Queries.Join (leftJoin6, leftJoin3')
32 import Gargantext.Core.Types
33 import Control.Arrow (returnA)
34 import qualified Opaleye as O hiding (Order)
35 import Opaleye hiding (Query, Order)
36
37 newtype TSQuery = UnsafeTSQuery [Text]
38
39 globalTextSearch :: Connection -> ParentId -> Text -> IO [(NodeId, HyperdataDocument)]
40 globalTextSearch c p t = runQuery c (globalTextSearchQuery p t)
41
42 -- | Global search query where ParentId is Master Node Corpus Id
43 globalTextSearchQuery :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
44 globalTextSearchQuery _ q = proc () -> do
45 row <- queryNodeSearchTable -< ()
46 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
47 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
48 returnA -< (_ns_id row, _ns_hyperdata row)
49
50 ------------------------------------------------------------------------
51 -- | todo add limit and offset and order
52 graphCorpusDocSearch :: CorpusId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
53 graphCorpusDocSearch cId q = proc () -> do
54 (n, nn) <- graphCorpusDocSearchQuery -< ()
55 restrict -< (_ns_search n) @@ (pgTSQuery (unpack q))
56 restrict -< ( nodeNode_node1_id nn) .== (toNullable $ pgInt4 cId)
57 restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
58 returnA -< (_ns_id n, _ns_hyperdata n)
59
60 graphCorpusDocSearchQuery :: O.Query (NodeSearchRead, NodeNodeReadNull)
61 graphCorpusDocSearchQuery = leftJoin queryNodeSearchTable queryNodeNodeTable cond
62 where
63 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
64 cond (n, nn) = nodeNode_node1_id nn .== _ns_id n
65
66
67 getGraphCorpusAuthors :: Connection -> CorpusId -> Text -> IO [((Int, HyperdataDocument),(Maybe Int, Maybe HyperdataContact))]
68 getGraphCorpusAuthors c cId q = runQuery c $ selectGraphCorpusAuthors' cId q
69
70 selectGraphCorpusAuthors' :: CorpusId -> Text -> O.Query ((Column PGInt4, Column PGJsonb),(Column (Nullable PGInt4), Column (Nullable PGJsonb)))
71 selectGraphCorpusAuthors' cId q = proc () -> do
72 (docs, (corpusDoc, (docNgrams, (ngrams, (ngramsContact, contacts))))) <- queryGraphCorpusAuthors' -< ()
73 restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
74 restrict -< (nodeNode_node1_id corpusDoc) .== (toNullable $ pgInt4 cId)
75 restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
76 restrict -< (nodeNgram_type docNgrams) .== (toNullable $ pgInt4 $ ngramsTypeId Authors)
77 restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
78 returnA -< ((_ns_id docs, _ns_hyperdata docs), (_node_id contacts, _node_hyperdata contacts))
79
80
81
82 -- | This query can be used to select document with Authors in Annuaire only
83 selectGraphCorpusAuthors :: CorpusId -> Text -> O.Query (Column (Nullable PGInt4), Column PGInt4, Column PGJsonb)
84 selectGraphCorpusAuthors cId q = proc () -> do
85 (contacts, (contactNgrams, (ngrams, (docNgrams, (corpusDoc, docSearch))))) <- queryGraphCorpusAuthors -< ()
86 restrict -< (_ns_search docSearch) @@ (pgTSQuery $ unpack q )
87 restrict -< (nodeNode_node1_id corpusDoc) .== (toNullable $ pgInt4 cId)
88 restrict -< (_ns_typename docSearch) .== (toNullable $ pgInt4 $ nodeTypeId NodeDocument)
89 restrict -< (nodeNgram_type docNgrams) .== (toNullable $ pgInt4 $ ngramsTypeId Authors)
90 restrict -< (_node_typename contacts) .== (pgInt4 $ nodeTypeId NodeContact)
91 returnA -< (_ns_id docSearch, _node_id contacts, _node_hyperdata contacts)
92 --returnA -< (_ns_id docSearch, _ns_name docSearch)
93
94
95 queryGraphCorpusAuthors :: O.Query (NodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, (NodeNodeReadNull, NodeSearchReadNull)))))
96 queryGraphCorpusAuthors = leftJoin6 queryNodeSearchTable queryNodeNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeTable cond12 cond23 cond34 cond45 cond56
97 where
98 cond12 :: (NodeNodeRead, NodeSearchRead) -> Column PGBool
99 cond12 (nn, n) = nodeNode_node2_id nn .== _ns_id n
100
101 cond23 :: (NodeNgramRead, (NodeNodeRead, NodeSearchReadNull)) -> Column PGBool
102 cond23 (nng, (nn, _)) = nodeNgram_node_id nng .== nodeNode_node2_id nn
103
104 cond34 :: (NgramsRead, (NodeNgramRead, (NodeNodeReadNull, NodeSearchReadNull))) -> Column PGBool
105 cond34 (ng, (nng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
106
107 cond45 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, (NodeNodeReadNull, NodeSearchReadNull)))) -> Column PGBool
108 cond45 (nng2, (ng2, (_,(_,_)))) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
109
110 cond56 :: (NodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, (NodeNodeReadNull, NodeSearchReadNull))))) -> Column PGBool
111 cond56 (n2, (ng3, (_,(_,(_,_))))) = _node_id n2 .== nodeNgram_node_id ng3
112
113
114 queryGraphCorpusAuthors' :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))))
115 queryGraphCorpusAuthors' = leftJoin6 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
116 where
117 cond12 :: (NodeNgramRead, NodeRead) -> Column PGBool
118 cond12 (ng3, n2) = _node_id n2 .== nodeNgram_node_id ng3
119 ---------
120 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
121 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
122
123 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
124 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
125
126 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
127 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
128
129 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
130 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
131
132
133
134
135
136
137
138
139
140
141
142 -- | TODO [""] -> panic "error"
143 toTSQuery :: [Text] -> TSQuery
144 toTSQuery txt = UnsafeTSQuery txt
145
146
147 instance IsString TSQuery
148 where
149 fromString = UnsafeTSQuery . words . cs
150
151
152 instance ToField TSQuery
153 where
154 toField (UnsafeTSQuery xs)
155 = Many $ intersperse (Plain " && ")
156 $ map (\q -> Many [ Plain "plainto_tsquery("
157 , Escape (cs q)
158 , Plain ")"
159 ]
160 ) xs
161
162 data Order = Asc | Desc
163
164 instance ToField Order
165 where
166 toField Asc = Plain "ASC"
167 toField Desc = Plain "DESC"
168
169 -- TODO
170 -- FIX fav
171 -- ADD ngrams count
172 -- TESTS
173 textSearchQuery :: Query
174 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
175 \ , n.hyperdata->'title' \
176 \ , n.hyperdata->'source' \
177 \ , n.hyperdata->'authors' \
178 \ , COALESCE(nn.score,null) \
179 \ FROM nodes n \
180 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
181 \ WHERE \
182 \ n.search @@ (?::tsquery) \
183 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
184 \ AND n.typename = ? \
185 \ ORDER BY n.hyperdata -> 'publication_date' ? \
186 \ offset ? limit ?;"
187
188 -- | Text Search Function for Master Corpus
189 -- TODO : text search for user corpus
190 -- Example:
191 -- textSearchTest :: ParentId -> TSQuery -> Cmd [(Int, Value, Value, Value, Value, Maybe Int)]
192 -- textSearchTest pId q = mkCmd $ \c -> textSearch c q pId 5 0 Asc
193 textSearch :: Connection
194 -> TSQuery -> ParentId
195 -> Limit -> Offset -> Order
196 -> IO [(Int,Value,Value,Value, Value, Maybe Int)]
197 textSearch conn q p l o ord = query conn textSearchQuery (q,p,p,typeId,ord,o,l)
198 where
199 typeId = nodeTypeId NodeDocument
200
201