]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
Typo
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14
15 module Gargantext.Database.TextSearch where
16
17 import Data.Aeson
18 import Data.Map.Strict hiding (map, drop, take)
19 import Data.Maybe
20 import Data.List (intersperse, take, drop)
21 import Data.String (IsString(..))
22 import Data.Text (Text, words, unpack, intercalate)
23 import Data.Time (UTCTime)
24 import Database.PostgreSQL.Simple -- (Query, Connection)
25 import Database.PostgreSQL.Simple.ToField
26 import Gargantext.Database.Config (nodeTypeId)
27 import Gargantext.Database.Types.Node (NodeType(..))
28 import Gargantext.Prelude
29 --import Gargantext.Database.Node.Contact
30 import Gargantext.Database.Facet
31 import Gargantext.Database.Schema.Node
32 import Gargantext.Database.Schema.Ngrams
33 import Gargantext.Database.Schema.NodeNode
34 import Gargantext.Database.Schema.NodeNgram
35 import Gargantext.Database.Queries.Join (leftJoin6)
36 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
37 import Gargantext.Core.Types
38 import Control.Arrow (returnA)
39 import qualified Opaleye as O hiding (Order)
40 import Opaleye hiding (Query, Order)
41
42
43 ------------------------------------------------------------------------
44 searchInDatabase :: Connection -> ParentId -> Text -> IO [(NodeId, HyperdataDocument)]
45 searchInDatabase c p t = runQuery c (queryInDatabase p t)
46
47 -- | Global search query where ParentId is Master Node Corpus Id
48 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
49 queryInDatabase _ q = proc () -> do
50 row <- queryNodeSearchTable -< ()
51 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
52 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
53 returnA -< (_ns_id row, _ns_hyperdata row)
54
55 ------------------------------------------------------------------------
56 -- | todo add limit and offset and order
57 searchInCorpus :: Connection -> CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> IO [FacetDoc]
58 searchInCorpus c cId q o l order = runQuery c (filterWith o l order $ queryInCorpus cId q')
59 where
60 q' = intercalate " | " $ map stemIt q
61
62 queryInCorpus :: CorpusId -> Text -> O.Query FacetDocRead
63 queryInCorpus cId q = proc () -> do
64 (n, nn) <- joinInCorpus -< ()
65 restrict -< ( nodeNode_node1_id nn) .== (toNullable $ pgInt4 cId)
66 restrict -< (_ns_search n) @@ (pgTSQuery (unpack q))
67 restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
68 returnA -< FacetDoc (_ns_id n) (_ns_date n) (_ns_name n) (_ns_hyperdata n) (pgBool True) (pgInt4 1)
69
70 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
71 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
72 where
73 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
74 cond (n, nn) = nodeNode_node2_id nn .== _ns_id n
75
76 ------------------------------------------------------------------------
77 type AuthorName = Text
78
79 -- | TODO Optim: Offset and Limit in the Query
80 searchInCorpusWithContacts :: Connection -> CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> IO [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
81 searchInCorpusWithContacts c cId q o l order = take (maybe 5 identity l) <$> drop (maybe 0 identity o)
82 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s (catMaybes ps))
83 <$> toList <$> fromListWith (<>)
84 <$> map (\(FacetPaired i u h s p) -> ((i,u,h,s), [maybePair p]))
85 <$> searchInCorpusWithContacts' c cId q o l order
86 where
87 maybePair (Pair Nothing Nothing) = Nothing
88 maybePair (Pair _ Nothing) = Nothing
89 maybePair (Pair Nothing _) = Nothing
90 maybePair (Pair (Just p_id) (Just p_label)) = Just $ Pair p_id p_label
91
92 searchInCorpusWithContacts' :: Connection -> CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> IO [(FacetPaired Int UTCTime HyperdataDocument Int (Pair (Maybe Int) (Maybe Text)))]
93 searchInCorpusWithContacts' c cId q o l order = runQuery c $ queryInCorpusWithContacts cId q' o l order
94 where
95 q' = intercalate " | " $ map stemIt q
96
97
98
99 queryInCorpusWithContacts :: CorpusId -> Text -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> O.Query FacetPairedRead
100 queryInCorpusWithContacts cId q _ _ _ = proc () -> do
101 (docs, (corpusDoc, (docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
102 restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
103 restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
104 restrict -< (nodeNode_node1_id corpusDoc) .== (toNullable $ pgInt4 cId)
105 restrict -< (nodeNgram_type docNgrams) .== (toNullable $ pgInt4 $ ngramsTypeId Authors)
106 restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
107 -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
108 returnA -< FacetPaired (_ns_id docs) (_ns_date docs) (_ns_hyperdata docs) (pgInt4 0) (Pair (_node_id contacts) (ngrams_terms ngrams'))
109
110 joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))))
111 joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
112 where
113 cond12 :: (NodeNgramRead, NodeRead) -> Column PGBool
114 cond12 (ng3, n2) = _node_id n2 .== nodeNgram_node_id ng3
115 ---------
116 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
117 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
118
119 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
120 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
121
122 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
123 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
124
125 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
126 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
127
128
129 {-
130 queryGraphCorpusAuthors' :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, NgramsReadNull)))
131 queryGraphCorpusAuthors' = leftJoin4 queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34
132 where
133 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
134 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
135
136 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
137 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
138
139 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
140 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
141
142 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
143 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
144 -}
145
146
147
148 newtype TSQuery = UnsafeTSQuery [Text]
149
150 -- | TODO [""] -> panic "error"
151 toTSQuery :: [Text] -> TSQuery
152 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
153
154
155 instance IsString TSQuery
156 where
157 fromString = UnsafeTSQuery . words . cs
158
159
160 instance ToField TSQuery
161 where
162 toField (UnsafeTSQuery xs)
163 = Many $ intersperse (Plain " && ")
164 $ map (\q -> Many [ Plain "plainto_tsquery("
165 , Escape (cs q)
166 , Plain ")"
167 ]
168 ) xs
169
170 data Order = Asc | Desc
171
172 instance ToField Order
173 where
174 toField Asc = Plain "ASC"
175 toField Desc = Plain "DESC"
176
177 -- TODO
178 -- FIX fav
179 -- ADD ngrams count
180 -- TESTS
181 textSearchQuery :: Query
182 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
183 \ , n.hyperdata->'title' \
184 \ , n.hyperdata->'source' \
185 \ , n.hyperdata->'authors' \
186 \ , COALESCE(nn.score,null) \
187 \ FROM nodes n \
188 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
189 \ WHERE \
190 \ n.search @@ (?::tsquery) \
191 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
192 \ AND n.typename = ? \
193 \ ORDER BY n.hyperdata -> 'publication_date' ? \
194 \ offset ? limit ?;"
195
196 -- | Text Search Function for Master Corpus
197 -- TODO : text search for user corpus
198 -- Example:
199 -- textSearchTest :: ParentId -> TSQuery -> Cmd [(Int, Value, Value, Value, Value, Maybe Int)]
200 -- textSearchTest pId q = mkCmd $ \c -> textSearch c q pId 5 0 Asc
201 textSearch :: Connection
202 -> TSQuery -> ParentId
203 -> Limit -> Offset -> Order
204 -> IO [(Int,Value,Value,Value, Value, Maybe Int)]
205 textSearch conn q p l o ord = query conn textSearchQuery (q,p,p,typeId,ord,o,l)
206 where
207 typeId = nodeTypeId NodeDocument
208
209