]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
[API] Graph with metadata.
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14
15 module Gargantext.Database.TextSearch where
16
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.String (IsString(..))
20 import Data.Text (Text, words, unpack)
21 import Database.PostgreSQL.Simple -- (Query, Connection)
22 import Database.PostgreSQL.Simple.ToField
23 import Gargantext.Database.Config (nodeTypeId)
24 import Gargantext.Database.Types.Node (NodeType(..))
25 import Gargantext.Prelude
26 --import Gargantext.Database.Node.Contact
27 import Gargantext.Database.Schema.Node
28 import Gargantext.Database.Schema.Ngrams
29 import Gargantext.Database.Schema.NodeNode
30 import Gargantext.Database.Schema.NodeNgram
31 import Gargantext.Database.Queries.Join (leftJoin6)
32 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
33 import Gargantext.Core.Types
34 import Control.Arrow (returnA)
35 import qualified Opaleye as O hiding (Order)
36 import Opaleye hiding (Query, Order)
37
38
39 ------------------------------------------------------------------------
40 searchInDatabase :: Connection -> ParentId -> Text -> IO [(NodeId, HyperdataDocument)]
41 searchInDatabase c p t = runQuery c (queryInDatabase p t)
42
43 -- | Global search query where ParentId is Master Node Corpus Id
44 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
45 queryInDatabase _ q = proc () -> do
46 row <- queryNodeSearchTable -< ()
47 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
48 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
49 returnA -< (_ns_id row, _ns_hyperdata row)
50
51 ------------------------------------------------------------------------
52 -- | todo add limit and offset and order
53 searchInCorpus :: Connection -> CorpusId -> Text -> IO [(NodeId, HyperdataDocument)]
54 searchInCorpus c cId q = runQuery c (queryInCorpus cId q)
55
56 queryInCorpus :: CorpusId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
57 queryInCorpus cId q = proc () -> do
58 (n, nn) <- joinInCorpus -< ()
59 restrict -< ( nodeNode_node1_id nn) .== (toNullable $ pgInt4 cId)
60 restrict -< (_ns_search n) @@ (pgTSQuery (unpack q))
61 restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
62 returnA -< (_ns_id n, _ns_hyperdata n)
63
64 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
65 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
66 where
67 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
68 cond (n, nn) = nodeNode_node2_id nn .== _ns_id n
69
70 ------------------------------------------------------------------------
71 type AuthorName = Text
72
73 searchInCorpusWithContacts :: Connection -> CorpusId -> Text -> IO [((Int, HyperdataDocument),(ContactId, Maybe AuthorName))]
74 searchInCorpusWithContacts c cId q = runQuery c $ queryInCorpusWithContacts cId q
75
76 queryInCorpusWithContacts :: CorpusId -> Text -> O.Query ((Column PGInt4, Column PGJsonb), (Column (PGInt4), Column (Nullable PGText)))
77 queryInCorpusWithContacts cId q = proc () -> do
78 (docs, (corpusDoc, (docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
79 restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
80 restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
81 restrict -< (nodeNode_node1_id corpusDoc) .== (toNullable $ pgInt4 cId)
82 restrict -< (nodeNgram_type docNgrams) .== (toNullable $ pgInt4 $ ngramsTypeId Authors)
83 restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
84 -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
85 returnA -< ((_ns_id docs, _ns_hyperdata docs),(fromNullable (pgInt4 0) (_node_id contacts), ngrams_terms ngrams'))
86
87 joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))))
88 joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
89 where
90 cond12 :: (NodeNgramRead, NodeRead) -> Column PGBool
91 cond12 (ng3, n2) = _node_id n2 .== nodeNgram_node_id ng3
92 ---------
93 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
94 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
95
96 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
97 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
98
99 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
100 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
101
102 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
103 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
104
105
106 {-
107 queryGraphCorpusAuthors' :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, NgramsReadNull)))
108 queryGraphCorpusAuthors' = leftJoin4 queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34
109 where
110 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
111 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
112
113 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
114 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
115
116 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
117 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
118
119 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
120 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
121 -}
122
123
124
125 newtype TSQuery = UnsafeTSQuery [Text]
126
127 -- | TODO [""] -> panic "error"
128 toTSQuery :: [Text] -> TSQuery
129 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
130
131
132 instance IsString TSQuery
133 where
134 fromString = UnsafeTSQuery . words . cs
135
136
137 instance ToField TSQuery
138 where
139 toField (UnsafeTSQuery xs)
140 = Many $ intersperse (Plain " && ")
141 $ map (\q -> Many [ Plain "plainto_tsquery("
142 , Escape (cs q)
143 , Plain ")"
144 ]
145 ) xs
146
147 data Order = Asc | Desc
148
149 instance ToField Order
150 where
151 toField Asc = Plain "ASC"
152 toField Desc = Plain "DESC"
153
154 -- TODO
155 -- FIX fav
156 -- ADD ngrams count
157 -- TESTS
158 textSearchQuery :: Query
159 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
160 \ , n.hyperdata->'title' \
161 \ , n.hyperdata->'source' \
162 \ , n.hyperdata->'authors' \
163 \ , COALESCE(nn.score,null) \
164 \ FROM nodes n \
165 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
166 \ WHERE \
167 \ n.search @@ (?::tsquery) \
168 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
169 \ AND n.typename = ? \
170 \ ORDER BY n.hyperdata -> 'publication_date' ? \
171 \ offset ? limit ?;"
172
173 -- | Text Search Function for Master Corpus
174 -- TODO : text search for user corpus
175 -- Example:
176 -- textSearchTest :: ParentId -> TSQuery -> Cmd [(Int, Value, Value, Value, Value, Maybe Int)]
177 -- textSearchTest pId q = mkCmd $ \c -> textSearch c q pId 5 0 Asc
178 textSearch :: Connection
179 -> TSQuery -> ParentId
180 -> Limit -> Offset -> Order
181 -> IO [(Int,Value,Value,Value, Value, Maybe Int)]
182 textSearch conn q p l o ord = query conn textSearchQuery (q,p,p,typeId,ord,o,l)
183 where
184 typeId = nodeTypeId NodeDocument
185
186