]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
[VIZ/CHARTS] Histogram by year.
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15
16 module Gargantext.Database.TextSearch where
17
18 import Data.Aeson
19 import Data.Map.Strict hiding (map, drop, take)
20 import Data.Maybe
21 import Data.List (intersperse, take, drop)
22 import Data.String (IsString(..))
23 import Data.Text (Text, words, unpack, intercalate)
24 import Data.Time (UTCTime)
25 import Database.PostgreSQL.Simple (Query)
26 import Database.PostgreSQL.Simple.ToField
27 import Gargantext.Database.Config (nodeTypeId)
28 import Gargantext.Database.Types.Node (NodeType(..))
29 import Gargantext.Prelude
30 --import Gargantext.Database.Node.Contact
31 import Gargantext.Database.Facet
32 import Gargantext.Database.Schema.Node
33 import Gargantext.Database.Schema.Ngrams
34 import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
35 import Gargantext.Database.Schema.NodeNgram
36 import Gargantext.Database.Queries.Join (leftJoin6)
37 import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery)
38 import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
39 import Gargantext.Core.Types
40 import Control.Arrow (returnA)
41 import qualified Opaleye as O hiding (Order)
42 import Opaleye hiding (Query, Order)
43
44
45 ------------------------------------------------------------------------
46 searchInDatabase :: ParentId -> Text -> Cmd err [(NodeId, HyperdataDocument)]
47 searchInDatabase p t = runOpaQuery (queryInDatabase p t)
48
49 -- | Global search query where ParentId is Master Node Corpus Id
50 queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
51 queryInDatabase _ q = proc () -> do
52 row <- queryNodeSearchTable -< ()
53 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
54 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
55 returnA -< (_ns_id row, _ns_hyperdata row)
56
57 ------------------------------------------------------------------------
58 -- | todo add limit and offset and order
59 searchInCorpus :: CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc]
60 searchInCorpus cId q o l order = runOpaQuery (filterWith o l order $ queryInCorpus cId q')
61 where
62 q' = intercalate " | " $ map stemIt q
63
64 queryInCorpus :: CorpusId -> Text -> O.Query FacetDocRead
65 queryInCorpus cId q = proc () -> do
66 (n, nn) <- joinInCorpus -< ()
67 restrict -< ( nn_node1_id nn) .== (toNullable $ pgNodeId cId)
68 restrict -< ( nn_delete nn) .== (toNullable $ pgBool False)
69 restrict -< (_ns_search n) @@ (pgTSQuery (unpack q))
70 restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
71 returnA -< FacetDoc (_ns_id n) (_ns_date n) (_ns_name n) (_ns_hyperdata n) (pgBool True) (pgInt4 1)
72
73 joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
74 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
75 where
76 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
77 cond (n, nn) = nn_node2_id nn .== _ns_id n
78
79 ------------------------------------------------------------------------
80 type AuthorName = Text
81
82 -- | TODO Optim: Offset and Limit in the Query
83 searchInCorpusWithContacts :: CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
84 searchInCorpusWithContacts cId q o l order = take (maybe 5 identity l) <$> drop (maybe 0 identity o)
85 <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s (catMaybes ps))
86 <$> toList <$> fromListWith (<>)
87 <$> map (\(FacetPaired i u h s p) -> ((i,u,h,s), [maybePair p]))
88 <$> searchInCorpusWithContacts' cId q o l order
89 where
90 maybePair (Pair Nothing Nothing) = Nothing
91 maybePair (Pair _ Nothing) = Nothing
92 maybePair (Pair Nothing _) = Nothing
93 maybePair (Pair (Just p_id) (Just p_label)) = Just $ Pair p_id p_label
94
95 searchInCorpusWithContacts' :: CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Pair (Maybe Int) (Maybe Text)))]
96 searchInCorpusWithContacts' cId q o l order = runOpaQuery $ queryInCorpusWithContacts cId q' o l order
97 where
98 q' = intercalate " | " $ map stemIt q
99
100
101
102 queryInCorpusWithContacts :: CorpusId -> Text -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> O.Query FacetPairedRead
103 queryInCorpusWithContacts cId q _ _ _ = proc () -> do
104 (docs, (corpusDoc, (docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
105 restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
106 restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
107 restrict -< (nn_node1_id corpusDoc) .== (toNullable $ pgNodeId cId)
108 restrict -< (nng_listType docNgrams) .== (toNullable $ pgNgramsType Authors)
109 restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
110 -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
111 returnA -< FacetPaired (_ns_id docs) (_ns_date docs) (_ns_hyperdata docs) (pgInt4 0) (Pair (_node_id contacts) (ngrams_terms ngrams'))
112
113 joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))))
114 joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
115 where
116 cond12 :: (NodeNgramRead, NodeRead) -> Column PGBool
117 cond12 (ng3, n2) = _node_id n2 .== nng_node_id ng3
118 ---------
119 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
120 cond23 (ng2, (nng2, _)) = nng_ngrams_id nng2 .== ngrams_id ng2
121
122 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
123 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nng_ngrams_id nng
124
125 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
126 cond45 (nn, (nng, (_,(_,_)))) = nng_node_id nng .== nn_node2_id nn
127
128 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
129 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn_node2_id nn
130
131
132 {-
133 queryGraphCorpusAuthors' :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, NgramsReadNull)))
134 queryGraphCorpusAuthors' = leftJoin4 queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34
135 where
136 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
137 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
138
139 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
140 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
141
142 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
143 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
144
145 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
146 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
147 -}
148
149
150
151 newtype TSQuery = UnsafeTSQuery [Text]
152
153 -- | TODO [""] -> panic "error"
154 toTSQuery :: [Text] -> TSQuery
155 toTSQuery txt = UnsafeTSQuery $ map stemIt txt
156
157
158 instance IsString TSQuery
159 where
160 fromString = UnsafeTSQuery . words . cs
161
162
163 instance ToField TSQuery
164 where
165 toField (UnsafeTSQuery xs)
166 = Many $ intersperse (Plain " && ")
167 $ map (\q -> Many [ Plain "plainto_tsquery("
168 , Escape (cs q)
169 , Plain ")"
170 ]
171 ) xs
172
173 data Order = Asc | Desc
174
175 instance ToField Order
176 where
177 toField Asc = Plain "ASC"
178 toField Desc = Plain "DESC"
179
180 -- TODO
181 -- FIX fav
182 -- ADD ngrams count
183 -- TESTS
184 textSearchQuery :: Query
185 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
186 \ , n.hyperdata->'title' \
187 \ , n.hyperdata->'source' \
188 \ , n.hyperdata->'authors' \
189 \ , COALESCE(nn.score,null) \
190 \ FROM nodes n \
191 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
192 \ WHERE \
193 \ n.search @@ (?::tsquery) \
194 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
195 \ AND n.typename = ? \
196 \ ORDER BY n.hyperdata -> 'publication_date' ? \
197 \ offset ? limit ?;"
198
199 -- | Text Search Function for Master Corpus
200 -- TODO : text search for user corpus
201 -- Example:
202 -- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
203 -- textSearchTest pId q = textSearch q pId 5 0 Asc
204 textSearch :: TSQuery -> ParentId
205 -> Limit -> Offset -> Order
206 -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
207 textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
208 where
209 typeId = nodeTypeId NodeDocument
210
211