]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
[Graph Search] union of pairs.
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14
15 module Gargantext.Database.TextSearch where
16
17 import Data.Aeson
18 import Data.Map.Strict hiding (map)
19 import Data.Maybe
20 import Data.List (intersperse)
21 import Data.String (IsString(..))
22 import Data.Text (Text, words, unpack, intercalate)
23 import Data.Time (UTCTime)
24 import Database.PostgreSQL.Simple -- (Query, Connection)
25 import Database.PostgreSQL.Simple.ToField
26 import Gargantext.Database.Config (nodeTypeId)
27 import Gargantext.Database.Types.Node (NodeType(..))
28 import Gargantext.Prelude
29 --import Gargantext.Database.Node.Contact
30 import Gargantext.Database.Facet
31 import Gargantext.Database.Schema.Node
32 import Gargantext.Database.Schema.Ngrams
33 import Gargantext.Database.Schema.NodeNode
34 import Gargantext.Database.Schema.NodeNgram
35 import Gargantext.Database.Queries.Join (leftJoin6)
36 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
37 import Gargantext.Core.Types
38 import Control.Arrow (returnA)
39 import qualified Opaleye as O hiding (Order)
40 import Opaleye hiding (Query, Order)
41
42
43 ------------------------------------------------------------------------
44 searchInDatabase :: Connection -> ParentId -> Text -> IO [(NodeId, HyperdataDocument)]
45 searchInDatabase c p t = runQuery c (queryInDatabase p t)
46
47 -- | Global search query where ParentId is Master Node Corpus Id
48 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
49 queryInDatabase _ q = proc () -> do
50 row <- queryNodeSearchTable -< ()
51 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
52 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
53 returnA -< (_ns_id row, _ns_hyperdata row)
54
55 ------------------------------------------------------------------------
56 -- | todo add limit and offset and order
57 searchInCorpus :: Connection -> CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> IO [FacetDoc]
58 searchInCorpus c cId q o l order = runQuery c (filterWith o l order $ queryInCorpus cId q')
59 where
60 q' = intercalate " || " $ map stemIt q
61
62 queryInCorpus :: CorpusId -> Text -> O.Query FacetDocRead
63 queryInCorpus cId q = proc () -> do
64 (n, nn) <- joinInCorpus -< ()
65 restrict -< ( nodeNode_node1_id nn) .== (toNullable $ pgInt4 cId)
66 restrict -< (_ns_search n) @@ (pgTSQuery (unpack q))
67 restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
68 returnA -< FacetDoc (_ns_id n) (_ns_date n) (_ns_name n) (_ns_hyperdata n) (pgBool True) (pgInt4 1)
69
70 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
71 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
72 where
73 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
74 cond (n, nn) = nodeNode_node2_id nn .== _ns_id n
75
76 ------------------------------------------------------------------------
77 type AuthorName = Text
78
79 searchInCorpusWithContacts :: Connection -> CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> IO [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
80 searchInCorpusWithContacts c cId q o l order = map (\((i,u,h,s), ps) -> FacetPaired i u h s (catMaybes ps)) <$> toList <$> fromListWith (<>) <$> map (\(FacetPaired i u h s p) -> ((i,u,h,s), [maybePair p])) <$> searchInCorpusWithContacts' c cId q o l order
81 where
82 maybePair (Pair Nothing Nothing) = Nothing
83 maybePair (Pair _ Nothing) = Nothing
84 maybePair (Pair Nothing _) = Nothing
85 maybePair (Pair (Just i) (Just l)) = Just $ Pair i l
86
87 searchInCorpusWithContacts' :: Connection -> CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> IO [(FacetPaired Int UTCTime HyperdataDocument Int (Pair (Maybe Int) (Maybe Text)))]
88 searchInCorpusWithContacts' c cId q o l order = runQuery c $ queryInCorpusWithContacts cId q' o l order
89 where
90 q' = intercalate " || " $ map stemIt q
91
92
93
94 queryInCorpusWithContacts :: CorpusId -> Text -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> O.Query FacetPairedRead
95 queryInCorpusWithContacts cId q _ _ _ = proc () -> do
96 (docs, (corpusDoc, (docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
97 restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
98 restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
99 restrict -< (nodeNode_node1_id corpusDoc) .== (toNullable $ pgInt4 cId)
100 restrict -< (nodeNgram_type docNgrams) .== (toNullable $ pgInt4 $ ngramsTypeId Authors)
101 restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
102 -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
103 returnA -< FacetPaired (_ns_id docs) (_ns_date docs) (_ns_hyperdata docs) (pgInt4 0) (Pair (_node_id contacts) (ngrams_terms ngrams'))
104
105 joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))))
106 joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
107 where
108 cond12 :: (NodeNgramRead, NodeRead) -> Column PGBool
109 cond12 (ng3, n2) = _node_id n2 .== nodeNgram_node_id ng3
110 ---------
111 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
112 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
113
114 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
115 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
116
117 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
118 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
119
120 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
121 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
122
123
124 {-
125 queryGraphCorpusAuthors' :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, NgramsReadNull)))
126 queryGraphCorpusAuthors' = leftJoin4 queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34
127 where
128 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
129 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
130
131 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
132 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
133
134 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
135 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
136
137 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
138 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
139 -}
140
141
142
143 newtype TSQuery = UnsafeTSQuery [Text]
144
145 -- | TODO [""] -> panic "error"
146 toTSQuery :: [Text] -> TSQuery
147 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
148
149
150 instance IsString TSQuery
151 where
152 fromString = UnsafeTSQuery . words . cs
153
154
155 instance ToField TSQuery
156 where
157 toField (UnsafeTSQuery xs)
158 = Many $ intersperse (Plain " && ")
159 $ map (\q -> Many [ Plain "plainto_tsquery("
160 , Escape (cs q)
161 , Plain ")"
162 ]
163 ) xs
164
165 data Order = Asc | Desc
166
167 instance ToField Order
168 where
169 toField Asc = Plain "ASC"
170 toField Desc = Plain "DESC"
171
172 -- TODO
173 -- FIX fav
174 -- ADD ngrams count
175 -- TESTS
176 textSearchQuery :: Query
177 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
178 \ , n.hyperdata->'title' \
179 \ , n.hyperdata->'source' \
180 \ , n.hyperdata->'authors' \
181 \ , COALESCE(nn.score,null) \
182 \ FROM nodes n \
183 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
184 \ WHERE \
185 \ n.search @@ (?::tsquery) \
186 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
187 \ AND n.typename = ? \
188 \ ORDER BY n.hyperdata -> 'publication_date' ? \
189 \ offset ? limit ?;"
190
191 -- | Text Search Function for Master Corpus
192 -- TODO : text search for user corpus
193 -- Example:
194 -- textSearchTest :: ParentId -> TSQuery -> Cmd [(Int, Value, Value, Value, Value, Maybe Int)]
195 -- textSearchTest pId q = mkCmd $ \c -> textSearch c q pId 5 0 Asc
196 textSearch :: Connection
197 -> TSQuery -> ParentId
198 -> Limit -> Offset -> Order
199 -> IO [(Int,Value,Value,Value, Value, Maybe Int)]
200 textSearch conn q p l o ord = query conn textSearchQuery (q,p,p,typeId,ord,o,l)
201 where
202 typeId = nodeTypeId NodeDocument
203
204