]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
Merge branch 'tsvector'
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14
15 module Gargantext.Database.TextSearch where
16
17 import Data.Aeson
18 import Data.List (intersperse)
19 import Data.String (IsString(..))
20 import Data.Text (Text, words, unpack)
21 import Database.PostgreSQL.Simple -- (Query, Connection)
22 import Database.PostgreSQL.Simple.ToField
23 import Gargantext.Database.Config (nodeTypeId)
24 import Gargantext.Database.Types.Node (NodeType(..))
25 import Gargantext.Prelude
26 import Gargantext.Database.Schema.Node
27 import Gargantext.Database.Schema.NodeNode
28 import Gargantext.Core.Types
29 import Control.Arrow (returnA)
30 import qualified Opaleye as O hiding (Order)
31 import Opaleye hiding (Query, Order)
32
33 newtype TSQuery = UnsafeTSQuery [Text]
34
35 globalTextSearch :: Connection -> ParentId -> Text -> IO [(NodeId, HyperdataDocument)]
36 globalTextSearch c p t = runQuery c (globalTextSearchQuery p t)
37
38 -- | Global search query where ParentId is Master Node Corpus Id
39 globalTextSearchQuery :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
40 globalTextSearchQuery _ q = proc () -> do
41 row <- queryNodeSearchTable -< ()
42 restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
43 restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
44 returnA -< (_ns_id row, _ns_hyperdata row)
45
46 ------------------------------------------------------------------------
47 {-
48 graphCorpusAuthorQuery :: O.Query (NodeRead, (NodeNgramRead, (NgramsReadNull, NodeNgramReadNull)))
49 graphCorpusAuthorQuery = leftJoin4 queryNgramsTable queryNodeNgramTable queryNodeNgramTable queryNodeTable cond12 cond23 cond34
50 where
51 --cond12 :: (NgramsRead, NodeNgramRead) -> Column PGBool
52 cond12 = undefined
53
54 cond23 :: (NodeNgramRead, (NodeNgramRead, NodeNgramReadNull)) -> Column PGBool
55 cond23 = undefined
56
57 cond34 :: (NodeRead, (NodeNgramRead, (NodeReadNull, NodeNgramReadNull))) -> Column PGBool
58 cond34 = undefined
59 --}
60 --runGraphCorpusDocSearch :: Connection -> CorpusId -> Text -> IO [(Column PGInt4, Column PGJsonb)]
61 --runGraphCorpusDocSearch c cId t = runQuery c $ graphCorpusDocSearch cId t
62
63
64 -- | todo add limit and offset and order
65 graphCorpusDocSearch :: CorpusId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
66 graphCorpusDocSearch cId t = proc () -> do
67 (n, nn) <- graphCorpusDocSearchQuery -< ()
68 restrict -< (_ns_search n) @@ (pgTSQuery (unpack t))
69 restrict -< ( nodeNode_node1_id nn) .== (toNullable $ pgInt4 cId)
70 restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
71 returnA -< (_ns_id n, _ns_hyperdata n)
72
73 graphCorpusDocSearchQuery :: O.Query (NodeSearchRead, NodeNodeReadNull)
74 graphCorpusDocSearchQuery = leftJoin queryNodeSearchTable queryNodeNodeTable cond
75 where
76 cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
77 cond (n, nn) = nodeNode_node1_id nn .== _ns_id n
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 -- | TODO [""] -> panic "error"
95 toTSQuery :: [Text] -> TSQuery
96 toTSQuery txt = UnsafeTSQuery txt
97
98
99 instance IsString TSQuery
100 where
101 fromString = UnsafeTSQuery . words . cs
102
103
104 instance ToField TSQuery
105 where
106 toField (UnsafeTSQuery xs)
107 = Many $ intersperse (Plain " && ")
108 $ map (\q -> Many [ Plain "plainto_tsquery("
109 , Escape (cs q)
110 , Plain ")"
111 ]
112 ) xs
113
114 data Order = Asc | Desc
115
116 instance ToField Order
117 where
118 toField Asc = Plain "ASC"
119 toField Desc = Plain "DESC"
120
121 -- TODO
122 -- FIX fav
123 -- ADD ngrams count
124 -- TESTS
125 textSearchQuery :: Query
126 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
127 \ , n.hyperdata->'title' \
128 \ , n.hyperdata->'source' \
129 \ , n.hyperdata->'authors' \
130 \ , COALESCE(nn.score,null) \
131 \ FROM nodes n \
132 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
133 \ WHERE \
134 \ n.search @@ (?::tsquery) \
135 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
136 \ AND n.typename = ? \
137 \ ORDER BY n.hyperdata -> 'publication_date' ? \
138 \ offset ? limit ?;"
139
140 -- | Text Search Function for Master Corpus
141 -- TODO : text search for user corpus
142 -- Example:
143 -- textSearchTest :: ParentId -> TSQuery -> Cmd [(Int, Value, Value, Value, Value, Maybe Int)]
144 -- textSearchTest pId q = mkCmd $ \c -> textSearch c q pId 5 0 Asc
145 textSearch :: Connection
146 -> TSQuery -> ParentId
147 -> Limit -> Offset -> Order
148 -> IO [(Int,Value,Value,Value, Value, Maybe Int)]
149 textSearch conn q p l o ord = query conn textSearchQuery (q,p,p,typeId,ord,o,l)
150 where
151 typeId = nodeTypeId NodeDocument
152
153