]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
[Database][Search] semantics ok.
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14
15 module Gargantext.Database.TextSearch where
16
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.String (IsString(..))
20 import Data.Text (Text, words, unpack)
21 import Database.PostgreSQL.Simple -- (Query, Connection)
22 import Database.PostgreSQL.Simple.ToField
23 import Gargantext.Database.Config (nodeTypeId)
24 import Gargantext.Database.Types.Node (NodeType(..))
25 import Gargantext.Prelude
26 import Gargantext.Database.Node.Contact
27 import Gargantext.Database.Schema.Node
28 import Gargantext.Database.Schema.Ngrams
29 import Gargantext.Database.Schema.NodeNode
30 import Gargantext.Database.Schema.NodeNgram
31 import Gargantext.Database.Queries.Join (leftJoin6, leftJoin3')
32 import Gargantext.Core.Types
33 import Control.Arrow (returnA)
34 import qualified Opaleye as O hiding (Order)
35 import Opaleye hiding (Query, Order)
36
37
38 ------------------------------------------------------------------------
39 searchInDatabase :: Connection -> ParentId -> Text -> IO [(NodeId, HyperdataDocument)]
40 searchInDatabase c p t = runQuery c (queryInDatabase p t)
41
42 -- | Global search query where ParentId is Master Node Corpus Id
43 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
44 queryInDatabase _ q = proc () -> do
45 row <- queryNodeSearchTable -< ()
46 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
47 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
48 returnA -< (_ns_id row, _ns_hyperdata row)
49
50 ------------------------------------------------------------------------
51 -- | todo add limit and offset and order
52 searchInCorpus :: Connection -> CorpusId -> Text -> IO [(NodeId, HyperdataDocument)]
53 searchInCorpus c cId q = runQuery c (queryInCorpus cId q)
54
55 queryInCorpus :: CorpusId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
56 queryInCorpus cId q = proc () -> do
57 (n, nn) <- joinInCorpus -< ()
58 restrict -< ( nodeNode_node1_id nn) .== (toNullable $ pgInt4 cId)
59 restrict -< (_ns_search n) @@ (pgTSQuery (unpack q))
60 restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
61 returnA -< (_ns_id n, _ns_hyperdata n)
62
63 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
64 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
65 where
66 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
67 cond (n, nn) = nodeNode_node2_id nn .== _ns_id n
68
69 ------------------------------------------------------------------------
70 type AuthorName = Text
71
72 searchInCorpusWithContacts :: Connection -> CorpusId -> Text -> IO [((Int, HyperdataDocument),(ContactId, Maybe AuthorName))]
73 searchInCorpusWithContacts c cId q = runQuery c $ queryInCorpusWithContacts cId q
74
75 queryInCorpusWithContacts :: CorpusId -> Text -> O.Query ((Column PGInt4, Column PGJsonb), (Column (PGInt4), Column (Nullable PGText)))
76 queryInCorpusWithContacts cId q = proc () -> do
77 (docs, (corpusDoc, (docNgrams, (ngrams, (ngramsContact, contacts))))) <- joinInCorpusWithContacts -< ()
78 restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
79 restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
80 restrict -< (nodeNode_node1_id corpusDoc) .== (toNullable $ pgInt4 cId)
81 restrict -< (nodeNgram_type docNgrams) .== (toNullable $ pgInt4 $ ngramsTypeId Authors)
82 restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
83 -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
84 returnA -< ((_ns_id docs, _ns_hyperdata docs),(fromNullable (pgInt4 0) (_node_id contacts), ngrams_terms ngrams))
85
86 joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))))
87 joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
88 where
89 cond12 :: (NodeNgramRead, NodeRead) -> Column PGBool
90 cond12 (ng3, n2) = _node_id n2 .== nodeNgram_node_id ng3
91 ---------
92 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
93 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
94
95 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
96 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
97
98 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
99 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
100
101 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
102 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
103
104
105 {-
106 queryGraphCorpusAuthors' :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, NgramsReadNull)))
107 queryGraphCorpusAuthors' = leftJoin4 queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34
108 where
109 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
110 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
111
112 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
113 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
114
115 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
116 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
117
118 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
119 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
120 -}
121
122
123
124 newtype TSQuery = UnsafeTSQuery [Text]
125
126 -- | TODO [""] -> panic "error"
127 toTSQuery :: [Text] -> TSQuery
128 toTSQuery txt = UnsafeTSQuery txt
129
130
131 instance IsString TSQuery
132 where
133 fromString = UnsafeTSQuery . words . cs
134
135
136 instance ToField TSQuery
137 where
138 toField (UnsafeTSQuery xs)
139 = Many $ intersperse (Plain " && ")
140 $ map (\q -> Many [ Plain "plainto_tsquery("
141 , Escape (cs q)
142 , Plain ")"
143 ]
144 ) xs
145
146 data Order = Asc | Desc
147
148 instance ToField Order
149 where
150 toField Asc = Plain "ASC"
151 toField Desc = Plain "DESC"
152
153 -- TODO
154 -- FIX fav
155 -- ADD ngrams count
156 -- TESTS
157 textSearchQuery :: Query
158 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
159 \ , n.hyperdata->'title' \
160 \ , n.hyperdata->'source' \
161 \ , n.hyperdata->'authors' \
162 \ , COALESCE(nn.score,null) \
163 \ FROM nodes n \
164 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
165 \ WHERE \
166 \ n.search @@ (?::tsquery) \
167 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
168 \ AND n.typename = ? \
169 \ ORDER BY n.hyperdata -> 'publication_date' ? \
170 \ offset ? limit ?;"
171
172 -- | Text Search Function for Master Corpus
173 -- TODO : text search for user corpus
174 -- Example:
175 -- textSearchTest :: ParentId -> TSQuery -> Cmd [(Int, Value, Value, Value, Value, Maybe Int)]
176 -- textSearchTest pId q = mkCmd $ \c -> textSearch c q pId 5 0 Asc
177 textSearch :: Connection
178 -> TSQuery -> ParentId
179 -> Limit -> Offset -> Order
180 -> IO [(Int,Value,Value,Value, Value, Maybe Int)]
181 textSearch conn q p l o ord = query conn textSearchQuery (q,p,p,typeId,ord,o,l)
182 where
183 typeId = nodeTypeId NodeDocument
184
185