]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
[Query] Search in Corpus To FacetDoc.
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14
15 module Gargantext.Database.TextSearch where
16
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.String (IsString(..))
20 import Data.Text (Text, words, unpack, intercalate)
21 import Data.Time (UTCTime)
22 import Database.PostgreSQL.Simple -- (Query, Connection)
23 import Database.PostgreSQL.Simple.ToField
24 import Gargantext.Database.Config (nodeTypeId)
25 import Gargantext.Database.Types.Node (NodeType(..))
26 import Gargantext.Prelude
27 --import Gargantext.Database.Node.Contact
28 import Gargantext.Database.Facet
29 import Gargantext.Database.Schema.Node
30 import Gargantext.Database.Schema.Ngrams
31 import Gargantext.Database.Schema.NodeNode
32 import Gargantext.Database.Schema.NodeNgram
33 import Gargantext.Database.Queries.Join (leftJoin6)
34 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
35 import Gargantext.Core.Types
36 import Control.Arrow (returnA)
37 import qualified Opaleye as O hiding (Order)
38 import Opaleye hiding (Query, Order)
39
40
41 ------------------------------------------------------------------------
42 searchInDatabase :: Connection -> ParentId -> Text -> IO [(NodeId, HyperdataDocument)]
43 searchInDatabase c p t = runQuery c (queryInDatabase p t)
44
45 -- | Global search query where ParentId is Master Node Corpus Id
46 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
47 queryInDatabase _ q = proc () -> do
48 row <- queryNodeSearchTable -< ()
49 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
50 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
51 returnA -< (_ns_id row, _ns_hyperdata row)
52
53 ------------------------------------------------------------------------
54 -- | todo add limit and offset and order
55 searchInCorpus :: Connection -> CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> IO [FacetDoc]
56 searchInCorpus c cId q o l order = runQuery c (filterWith o l order $ queryInCorpus cId q')
57 where
58 q' = intercalate " || " $ map stemIt q
59
60 queryInCorpus :: CorpusId -> Text -> O.Query FacetDocRead
61 queryInCorpus cId q = proc () -> do
62 (n, nn) <- joinInCorpus -< ()
63 restrict -< ( nodeNode_node1_id nn) .== (toNullable $ pgInt4 cId)
64 restrict -< (_ns_search n) @@ (pgTSQuery (unpack q))
65 restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
66 returnA -< FacetDoc (_ns_id n) (_ns_date n) (_ns_name n) (_ns_hyperdata n) (pgBool True) (pgInt4 1)
67
68 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
69 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
70 where
71 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
72 cond (n, nn) = nodeNode_node2_id nn .== _ns_id n
73
74 ------------------------------------------------------------------------
75 type AuthorName = Text
76
77 searchInCorpusWithContacts :: Connection -> CorpusId -> Text -> IO [((Int, HyperdataDocument),(ContactId, Maybe AuthorName))]
78 searchInCorpusWithContacts c cId q = runQuery c $ queryInCorpusWithContacts cId q
79
80 queryInCorpusWithContacts :: CorpusId -> Text -> O.Query ((Column PGInt4, Column PGJsonb), (Column (PGInt4), Column (Nullable PGText)))
81 queryInCorpusWithContacts cId q = proc () -> do
82 (docs, (corpusDoc, (docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
83 restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
84 restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
85 restrict -< (nodeNode_node1_id corpusDoc) .== (toNullable $ pgInt4 cId)
86 restrict -< (nodeNgram_type docNgrams) .== (toNullable $ pgInt4 $ ngramsTypeId Authors)
87 restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
88 -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
89 returnA -< ((_ns_id docs, _ns_hyperdata docs),(fromNullable (pgInt4 0) (_node_id contacts), ngrams_terms ngrams'))
90
91 joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))))
92 joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
93 where
94 cond12 :: (NodeNgramRead, NodeRead) -> Column PGBool
95 cond12 (ng3, n2) = _node_id n2 .== nodeNgram_node_id ng3
96 ---------
97 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
98 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
99
100 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
101 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
102
103 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
104 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
105
106 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
107 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
108
109
110 {-
111 queryGraphCorpusAuthors' :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, NgramsReadNull)))
112 queryGraphCorpusAuthors' = leftJoin4 queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34
113 where
114 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
115 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
116
117 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
118 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
119
120 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
121 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
122
123 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
124 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
125 -}
126
127
128
129 newtype TSQuery = UnsafeTSQuery [Text]
130
131 -- | TODO [""] -> panic "error"
132 toTSQuery :: [Text] -> TSQuery
133 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
134
135
136 instance IsString TSQuery
137 where
138 fromString = UnsafeTSQuery . words . cs
139
140
141 instance ToField TSQuery
142 where
143 toField (UnsafeTSQuery xs)
144 = Many $ intersperse (Plain " && ")
145 $ map (\q -> Many [ Plain "plainto_tsquery("
146 , Escape (cs q)
147 , Plain ")"
148 ]
149 ) xs
150
151 data Order = Asc | Desc
152
153 instance ToField Order
154 where
155 toField Asc = Plain "ASC"
156 toField Desc = Plain "DESC"
157
158 -- TODO
159 -- FIX fav
160 -- ADD ngrams count
161 -- TESTS
162 textSearchQuery :: Query
163 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
164 \ , n.hyperdata->'title' \
165 \ , n.hyperdata->'source' \
166 \ , n.hyperdata->'authors' \
167 \ , COALESCE(nn.score,null) \
168 \ FROM nodes n \
169 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
170 \ WHERE \
171 \ n.search @@ (?::tsquery) \
172 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
173 \ AND n.typename = ? \
174 \ ORDER BY n.hyperdata -> 'publication_date' ? \
175 \ offset ? limit ?;"
176
177 -- | Text Search Function for Master Corpus
178 -- TODO : text search for user corpus
179 -- Example:
180 -- textSearchTest :: ParentId -> TSQuery -> Cmd [(Int, Value, Value, Value, Value, Maybe Int)]
181 -- textSearchTest pId q = mkCmd $ \c -> textSearch c q pId 5 0 Asc
182 textSearch :: Connection
183 -> TSQuery -> ParentId
184 -> Limit -> Offset -> Order
185 -> IO [(Int,Value,Value,Value, Value, Maybe Int)]
186 textSearch conn q p l o ord = query conn textSearchQuery (q,p,p,typeId,ord,o,l)
187 where
188 typeId = nodeTypeId NodeDocument
189
190