]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
[Query] clean.
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14
15 module Gargantext.Database.TextSearch where
16
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.String (IsString(..))
20 import Data.Text (Text, words, unpack)
21 import Database.PostgreSQL.Simple -- (Query, Connection)
22 import Database.PostgreSQL.Simple.ToField
23 import Gargantext.Database.Config (nodeTypeId)
24 import Gargantext.Database.Types.Node (NodeType(..))
25 import Gargantext.Prelude
26 import Gargantext.Database.Node.Contact
27 import Gargantext.Database.Schema.Node
28 import Gargantext.Database.Schema.Ngrams
29 import Gargantext.Database.Schema.NodeNode
30 import Gargantext.Database.Schema.NodeNgram
31 import Gargantext.Database.Queries.Join (leftJoin6, leftJoin3')
32 import Gargantext.Core.Types
33 import Control.Arrow (returnA)
34 import qualified Opaleye as O hiding (Order)
35 import Opaleye hiding (Query, Order)
36
37 newtype TSQuery = UnsafeTSQuery [Text]
38
39 globalTextSearch :: Connection -> ParentId -> Text -> IO [(NodeId, HyperdataDocument)]
40 globalTextSearch c p t = runQuery c (globalTextSearchQuery p t)
41
42 -- | Global search query where ParentId is Master Node Corpus Id
43 globalTextSearchQuery :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
44 globalTextSearchQuery _ q = proc () -> do
45 row <- queryNodeSearchTable -< ()
46 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
47 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
48 returnA -< (_ns_id row, _ns_hyperdata row)
49
50 ------------------------------------------------------------------------
51 -- | todo add limit and offset and order
52 graphCorpusDocSearch :: CorpusId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
53 graphCorpusDocSearch cId q = proc () -> do
54 (n, nn) <- graphCorpusDocSearchQuery -< ()
55 restrict -< (_ns_search n) @@ (pgTSQuery (unpack q))
56 restrict -< ( nodeNode_node1_id nn) .== (toNullable $ pgInt4 cId)
57 restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
58 returnA -< (_ns_id n, _ns_hyperdata n)
59
60 graphCorpusDocSearchQuery :: O.Query (NodeSearchRead, NodeNodeReadNull)
61 graphCorpusDocSearchQuery = leftJoin queryNodeSearchTable queryNodeNodeTable cond
62 where
63 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
64 cond (n, nn) = nodeNode_node1_id nn .== _ns_id n
65
66
67 getGraphCorpusAuthors :: Connection -> CorpusId -> Text -> IO [((Int, HyperdataDocument),(Maybe Int, Maybe HyperdataContact))]
68 getGraphCorpusAuthors c cId q = runQuery c $ selectGraphCorpusAuthors cId q
69
70 selectGraphCorpusAuthors :: CorpusId -> Text -> O.Query ((Column PGInt4, Column PGJsonb),(Column (Nullable PGInt4), Column (Nullable PGJsonb)))
71 selectGraphCorpusAuthors cId q = proc () -> do
72 (docs, (corpusDoc, (docNgrams, (ngrams, (ngramsContact, contacts))))) <- queryGraphCorpusAuthors -< ()
73 restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
74 restrict -< (nodeNode_node1_id corpusDoc) .== (toNullable $ pgInt4 cId)
75 restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
76 restrict -< (nodeNgram_type docNgrams) .== (toNullable $ pgInt4 $ ngramsTypeId Authors)
77 restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
78 returnA -< ((_ns_id docs, _ns_hyperdata docs), (_node_id contacts, _node_hyperdata contacts))
79
80
81 queryGraphCorpusAuthors :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))))
82 queryGraphCorpusAuthors = leftJoin6 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
83 where
84 cond12 :: (NodeNgramRead, NodeRead) -> Column PGBool
85 cond12 (ng3, n2) = _node_id n2 .== nodeNgram_node_id ng3
86 ---------
87 cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
88 cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
89
90 cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
91 cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
92
93 cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
94 cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
95
96 cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
97 cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
98
99
100
101
102
103
104
105
106
107 -- | TODO [""] -> panic "error"
108 toTSQuery :: [Text] -> TSQuery
109 toTSQuery txt = UnsafeTSQuery txt
110
111
112 instance IsString TSQuery
113 where
114 fromString = UnsafeTSQuery . words . cs
115
116
117 instance ToField TSQuery
118 where
119 toField (UnsafeTSQuery xs)
120 = Many $ intersperse (Plain " && ")
121 $ map (\q -> Many [ Plain "plainto_tsquery("
122 , Escape (cs q)
123 , Plain ")"
124 ]
125 ) xs
126
127 data Order = Asc | Desc
128
129 instance ToField Order
130 where
131 toField Asc = Plain "ASC"
132 toField Desc = Plain "DESC"
133
134 -- TODO
135 -- FIX fav
136 -- ADD ngrams count
137 -- TESTS
138 textSearchQuery :: Query
139 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
140 \ , n.hyperdata->'title' \
141 \ , n.hyperdata->'source' \
142 \ , n.hyperdata->'authors' \
143 \ , COALESCE(nn.score,null) \
144 \ FROM nodes n \
145 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
146 \ WHERE \
147 \ n.search @@ (?::tsquery) \
148 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
149 \ AND n.typename = ? \
150 \ ORDER BY n.hyperdata -> 'publication_date' ? \
151 \ offset ? limit ?;"
152
153 -- | Text Search Function for Master Corpus
154 -- TODO : text search for user corpus
155 -- Example:
156 -- textSearchTest :: ParentId -> TSQuery -> Cmd [(Int, Value, Value, Value, Value, Maybe Int)]
157 -- textSearchTest pId q = mkCmd $ \c -> textSearch c q pId 5 0 Asc
158 textSearch :: Connection
159 -> TSQuery -> ParentId
160 -> Limit -> Offset -> Order
161 -> IO [(Int,Value,Value,Value, Value, Maybe Int)]
162 textSearch conn q p l o ord = query conn textSearchQuery (q,p,p,typeId,ord,o,l)
163 where
164 typeId = nodeTypeId NodeDocument
165
166