]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
[Database][Schema][Ngrams]
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE Arrows #-}
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Database.TextSearch where
18
19
20 import Data.Aeson
21 import Data.List (intersperse)
22 import Data.String (IsString(..))
23 import Data.Text (Text, words, unpack)
24 import Database.PostgreSQL.Simple
25 import Database.PostgreSQL.Simple.ToField
26 import Gargantext.Database.Config (nodeTypeId)
27 import Gargantext.Database.Types.Node (NodeType(..))
28 import Gargantext.Prelude
29 import Gargantext.Database.Facet
30 import Gargantext.Database.Node
31 import Gargantext.Database.Schema.Ngrams
32 import Gargantext.Database.NodeNode
33 import Gargantext.Database.NodeNgram
34 import Gargantext.Database.Queries
35 import Gargantext.Core.Types
36 import Control.Arrow (returnA)
37 import qualified Opaleye as O hiding (Order)
38 import Opaleye hiding (Query, Order)
39
40 newtype TSQuery = UnsafeTSQuery [Text]
41
42 globalTextSearch :: Connection -> ParentId -> Text -> IO [(NodeId, HyperdataDocument)]
43 globalTextSearch c p t = runQuery c (globalTextSearchQuery p t)
44
45 -- | Global search query where ParentId is Master Node Corpus Id
46 globalTextSearchQuery :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
47 globalTextSearchQuery _ q = proc () -> do
48 row <- queryNodeTable -< ()
49 restrict -< (_node_search row) @@ (pgTSQuery (unpack q))
50 restrict -< (_node_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
51 returnA -< (_node_id row, _node_hyperdata row)
52
53 ------------------------------------------------------------------------
54 {-
55 graphCorpusAuthorQuery :: O.Query (NodeRead, (NodeNgramRead, (NgramsReadNull, NodeNgramReadNull)))
56 graphCorpusAuthorQuery = leftJoin4 queryNgramsTable queryNodeNgramTable queryNodeNgramTable queryNodeTable cond12 cond23 cond34
57 where
58 --cond12 :: (NgramsRead, NodeNgramRead) -> Column PGBool
59 cond12 = undefined
60
61 cond23 :: (NodeNgramRead, (NodeNgramRead, NodeNgramReadNull)) -> Column PGBool
62 cond23 = undefined
63
64 cond34 :: (NodeRead, (NodeNgramRead, (NodeReadNull, NodeNgramReadNull))) -> Column PGBool
65 cond34 = undefined
66 --}
67 --runGraphCorpusDocSearch :: Connection -> CorpusId -> Text -> IO [(Column PGInt4, Column PGJsonb)]
68 --runGraphCorpusDocSearch c cId t = runQuery c $ graphCorpusDocSearch cId t
69
70
71 -- | todo add limit and offset and order
72 graphCorpusDocSearch :: CorpusId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
73 graphCorpusDocSearch cId t = proc () -> do
74 (n, nn) <- graphCorpusDocSearchQuery -< ()
75 restrict -< (_node_search n) @@ (pgTSQuery (unpack t))
76 restrict -< ( nodeNode_node1_id nn) .== (toNullable $ pgInt4 cId)
77 restrict -< (_node_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
78 returnA -< (_node_id n, _node_hyperdata n)
79
80 graphCorpusDocSearchQuery :: O.Query (NodeRead, NodeNodeReadNull)
81 graphCorpusDocSearchQuery = leftJoin queryNodeTable queryNodeNodeTable cond
82 where
83 cond :: (NodeRead, NodeNodeRead) -> Column PGBool
84 cond (n, nn) = nodeNode_node1_id nn .== _node_id n
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101 -- | TODO [""] -> panic "error"
102 toTSQuery :: [Text] -> TSQuery
103 toTSQuery txt = UnsafeTSQuery txt
104
105
106 instance IsString TSQuery
107 where
108 fromString = UnsafeTSQuery . words . cs
109
110
111 instance ToField TSQuery
112 where
113 toField (UnsafeTSQuery xs)
114 = Many $ intersperse (Plain " && ")
115 $ map (\q -> Many [ Plain "plainto_tsquery("
116 , Escape (cs q)
117 , Plain ")"
118 ]
119 ) xs
120
121 data Order = Asc | Desc
122
123 instance ToField Order
124 where
125 toField Asc = Plain "ASC"
126 toField Desc = Plain "DESC"
127
128 -- TODO
129 -- FIX fav
130 -- ADD ngrams count
131 -- TESTS
132 textSearchQuery :: Query
133 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
134 \ , n.hyperdata->'title' \
135 \ , n.hyperdata->'source' \
136 \ , n.hyperdata->'authors' \
137 \ , COALESCE(nn.score,null) \
138 \ FROM nodes n \
139 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
140 \ WHERE \
141 \ n.search @@ (?::tsquery) \
142 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
143 \ AND n.typename = ? \
144 \ ORDER BY n.hyperdata -> 'publication_date' ? \
145 \ offset ? limit ?;"
146
147 -- | Text Search Function for Master Corpus
148 -- TODO : text search for user corpus
149 -- Example:
150 -- textSearchTest :: ParentId -> TSQuery -> Cmd [(Int, Value, Value, Value, Value, Maybe Int)]
151 -- textSearchTest pId q = mkCmd $ \c -> textSearch c q pId 5 0 Asc
152 textSearch :: Connection
153 -> TSQuery -> ParentId
154 -> Limit -> Offset -> Order
155 -> IO [(Int,Value,Value,Value, Value, Maybe Int)]
156 textSearch conn q p l o ord = query conn textSearchQuery (q,p,p,typeId,ord,o,l)
157 where
158 typeId = nodeTypeId NodeDocument
159
160