]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
[GRAPH][Search] Query -> FacetDoc.
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14
15 module Gargantext.Database.TextSearch where
16
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.String (IsString(..))
20 import Data.Text (Text, words, unpack)
21 import Data.Time (UTCTime)
22 import Database.PostgreSQL.Simple -- (Query, Connection)
23 import Database.PostgreSQL.Simple.ToField
24 import Gargantext.Database.Config (nodeTypeId)
25 import Gargantext.Database.Types.Node (NodeType(..))
26 import Gargantext.Prelude
27 --import Gargantext.Database.Node.Contact
28 import Gargantext.Database.Facet
29 import Gargantext.Database.Schema.Node
30 import Gargantext.Database.Schema.Ngrams
31 import Gargantext.Database.Schema.NodeNode
32 import Gargantext.Database.Schema.NodeNgram
33 import Gargantext.Database.Queries.Join (leftJoin6)
34 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
35 import Gargantext.Core.Types
36 import Control.Arrow (returnA)
37 import qualified Opaleye as O hiding (Order)
38 import Opaleye hiding (Query, Order)
39
40
41 ------------------------------------------------------------------------
42 searchInDatabase :: Connection -> ParentId -> Text -> IO [(NodeId, HyperdataDocument)]
43 searchInDatabase c p t = runQuery c (queryInDatabase p t)
44
45 -- | Global search query where ParentId is Master Node Corpus Id
46 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
47 queryInDatabase _ q = proc () -> do
48 row <- queryNodeSearchTable -< ()
49 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
50 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
51 returnA -< (_ns_id row, _ns_hyperdata row)
52
53 ------------------------------------------------------------------------
54 -- | todo add limit and offset and order
55 searchInCorpus :: Connection -> CorpusId -> Text -> IO [FacetDoc]
56 searchInCorpus c cId q = map toFacet <$> runQuery'
57 where
58 toFacet (nId, d, h) = FacetDoc nId d (maybe "Empty Title" identity $_hyperdataDocument_title h) h True 0
59 runQuery' :: IO [(Int, UTCTime, HyperdataDocument)]
60 runQuery' = runQuery c (queryInCorpus cId q)
61
62 queryInCorpus :: CorpusId -> Text -> O.Query (Column PGInt4, Column PGTimestamptz, Column PGJsonb)
63 queryInCorpus cId q = proc () -> do
64 (n, nn) <- joinInCorpus -< ()
65 restrict -< ( nodeNode_node1_id nn) .== (toNullable $ pgInt4 cId)
66 restrict -< (_ns_search n) @@ (pgTSQuery (unpack q))
67 restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
68 returnA -< (_ns_id n, _ns_date n, _ns_hyperdata n)
69
70 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
71 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
72 where
73 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
74 cond (n, nn) = nodeNode_node2_id nn .== _ns_id n
75
76 ------------------------------------------------------------------------
77 type AuthorName = Text
78
79 searchInCorpusWithContacts :: Connection -> CorpusId -> Text -> IO [((Int, HyperdataDocument),(ContactId, Maybe AuthorName))]
80 searchInCorpusWithContacts c cId q = runQuery c $ queryInCorpusWithContacts cId q
81
82 queryInCorpusWithContacts :: CorpusId -> Text -> O.Query ((Column PGInt4, Column PGJsonb), (Column (PGInt4), Column (Nullable PGText)))
83 queryInCorpusWithContacts cId q = proc () -> do
84 (docs, (corpusDoc, (docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
85 restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
86 restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
87 restrict -< (nodeNode_node1_id corpusDoc) .== (toNullable $ pgInt4 cId)
88 restrict -< (nodeNgram_type docNgrams) .== (toNullable $ pgInt4 $ ngramsTypeId Authors)
89 restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
90 -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
91 returnA -< ((_ns_id docs, _ns_hyperdata docs),(fromNullable (pgInt4 0) (_node_id contacts), ngrams_terms ngrams'))
92
93 joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))))
94 joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
95 where
96 cond12 :: (NodeNgramRead, NodeRead) -> Column PGBool
97 cond12 (ng3, n2) = _node_id n2 .== nodeNgram_node_id ng3
98 ---------
99 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
100 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
101
102 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
103 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
104
105 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
106 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
107
108 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
109 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
110
111
112 {-
113 queryGraphCorpusAuthors' :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, NgramsReadNull)))
114 queryGraphCorpusAuthors' = leftJoin4 queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34
115 where
116 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
117 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
118
119 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
120 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
121
122 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
123 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
124
125 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
126 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
127 -}
128
129
130
131 newtype TSQuery = UnsafeTSQuery [Text]
132
133 -- | TODO [""] -> panic "error"
134 toTSQuery :: [Text] -> TSQuery
135 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
136
137
138 instance IsString TSQuery
139 where
140 fromString = UnsafeTSQuery . words . cs
141
142
143 instance ToField TSQuery
144 where
145 toField (UnsafeTSQuery xs)
146 = Many $ intersperse (Plain " && ")
147 $ map (\q -> Many [ Plain "plainto_tsquery("
148 , Escape (cs q)
149 , Plain ")"
150 ]
151 ) xs
152
153 data Order = Asc | Desc
154
155 instance ToField Order
156 where
157 toField Asc = Plain "ASC"
158 toField Desc = Plain "DESC"
159
160 -- TODO
161 -- FIX fav
162 -- ADD ngrams count
163 -- TESTS
164 textSearchQuery :: Query
165 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
166 \ , n.hyperdata->'title' \
167 \ , n.hyperdata->'source' \
168 \ , n.hyperdata->'authors' \
169 \ , COALESCE(nn.score,null) \
170 \ FROM nodes n \
171 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
172 \ WHERE \
173 \ n.search @@ (?::tsquery) \
174 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
175 \ AND n.typename = ? \
176 \ ORDER BY n.hyperdata -> 'publication_date' ? \
177 \ offset ? limit ?;"
178
179 -- | Text Search Function for Master Corpus
180 -- TODO : text search for user corpus
181 -- Example:
182 -- textSearchTest :: ParentId -> TSQuery -> Cmd [(Int, Value, Value, Value, Value, Maybe Int)]
183 -- textSearchTest pId q = mkCmd $ \c -> textSearch c q pId 5 0 Asc
184 textSearch :: Connection
185 -> TSQuery -> ParentId
186 -> Limit -> Offset -> Order
187 -> IO [(Int,Value,Value,Value, Value, Maybe Int)]
188 textSearch conn q p l o ord = query conn textSearchQuery (q,p,p,typeId,ord,o,l)
189 where
190 typeId = nodeTypeId NodeDocument
191
192