]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/TextSearch.hs
[COMPIL] ok
[gargantext.git] / src / Gargantext / Database / TextSearch.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE Arrows #-}
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Database.TextSearch where
18
19 import Data.Aeson
20 import Data.List (intersperse)
21 import Data.String (IsString(..))
22 import Data.Text (Text, words, unpack)
23 import Database.PostgreSQL.Simple -- (Query, Connection)
24 import Database.PostgreSQL.Simple.ToField
25 import Gargantext.Database.Config (nodeTypeId)
26 import Gargantext.Database.Types.Node (NodeType(..))
27 import Gargantext.Prelude
28 import Gargantext.Database.Schema.Node
29 import Gargantext.Database.Schema.NodeNode
30 import Gargantext.Database.Queries
31 import Gargantext.Core.Types
32 import Control.Arrow (returnA)
33 import qualified Opaleye as O hiding (Order)
34 import Opaleye hiding (Query, Order)
35
36 newtype TSQuery = UnsafeTSQuery [Text]
37
38 globalTextSearch :: Connection -> ParentId -> Text -> IO [(NodeId, HyperdataDocument)]
39 globalTextSearch c p t = runQuery c (globalTextSearchQuery p t)
40
41 -- | Global search query where ParentId is Master Node Corpus Id
42 globalTextSearchQuery :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
43 globalTextSearchQuery _ q = proc () -> do
44 row <- queryNodeTable -< ()
45 restrict -< (_node_search row) @@ (pgTSQuery (unpack q))
46 restrict -< (_node_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
47 returnA -< (_node_id row, _node_hyperdata row)
48
49 ------------------------------------------------------------------------
50 {-
51 graphCorpusAuthorQuery :: O.Query (NodeRead, (NodeNgramRead, (NgramsReadNull, NodeNgramReadNull)))
52 graphCorpusAuthorQuery = leftJoin4 queryNgramsTable queryNodeNgramTable queryNodeNgramTable queryNodeTable cond12 cond23 cond34
53 where
54 --cond12 :: (NgramsRead, NodeNgramRead) -> Column PGBool
55 cond12 = undefined
56
57 cond23 :: (NodeNgramRead, (NodeNgramRead, NodeNgramReadNull)) -> Column PGBool
58 cond23 = undefined
59
60 cond34 :: (NodeRead, (NodeNgramRead, (NodeReadNull, NodeNgramReadNull))) -> Column PGBool
61 cond34 = undefined
62 --}
63 --runGraphCorpusDocSearch :: Connection -> CorpusId -> Text -> IO [(Column PGInt4, Column PGJsonb)]
64 --runGraphCorpusDocSearch c cId t = runQuery c $ graphCorpusDocSearch cId t
65
66
67 -- | todo add limit and offset and order
68 graphCorpusDocSearch :: CorpusId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
69 graphCorpusDocSearch cId t = proc () -> do
70 (n, nn) <- graphCorpusDocSearchQuery -< ()
71 restrict -< (_node_search n) @@ (pgTSQuery (unpack t))
72 restrict -< ( nodeNode_node1_id nn) .== (toNullable $ pgInt4 cId)
73 restrict -< (_node_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
74 returnA -< (_node_id n, _node_hyperdata n)
75
76 graphCorpusDocSearchQuery :: O.Query (NodeRead, NodeNodeReadNull)
77 graphCorpusDocSearchQuery = leftJoin queryNodeTable queryNodeNodeTable cond
78 where
79 cond :: (NodeRead, NodeNodeRead) -> Column PGBool
80 cond (n, nn) = nodeNode_node1_id nn .== _node_id n
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97 -- | TODO [""] -> panic "error"
98 toTSQuery :: [Text] -> TSQuery
99 toTSQuery txt = UnsafeTSQuery txt
100
101
102 instance IsString TSQuery
103 where
104 fromString = UnsafeTSQuery . words . cs
105
106
107 instance ToField TSQuery
108 where
109 toField (UnsafeTSQuery xs)
110 = Many $ intersperse (Plain " && ")
111 $ map (\q -> Many [ Plain "plainto_tsquery("
112 , Escape (cs q)
113 , Plain ")"
114 ]
115 ) xs
116
117 data Order = Asc | Desc
118
119 instance ToField Order
120 where
121 toField Asc = Plain "ASC"
122 toField Desc = Plain "DESC"
123
124 -- TODO
125 -- FIX fav
126 -- ADD ngrams count
127 -- TESTS
128 textSearchQuery :: Query
129 textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
130 \ , n.hyperdata->'title' \
131 \ , n.hyperdata->'source' \
132 \ , n.hyperdata->'authors' \
133 \ , COALESCE(nn.score,null) \
134 \ FROM nodes n \
135 \ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
136 \ WHERE \
137 \ n.search @@ (?::tsquery) \
138 \ AND (n.parent_id = ? OR nn.node1_id = ?) \
139 \ AND n.typename = ? \
140 \ ORDER BY n.hyperdata -> 'publication_date' ? \
141 \ offset ? limit ?;"
142
143 -- | Text Search Function for Master Corpus
144 -- TODO : text search for user corpus
145 -- Example:
146 -- textSearchTest :: ParentId -> TSQuery -> Cmd [(Int, Value, Value, Value, Value, Maybe Int)]
147 -- textSearchTest pId q = mkCmd $ \c -> textSearch c q pId 5 0 Asc
148 textSearch :: Connection
149 -> TSQuery -> ParentId
150 -> Limit -> Offset -> Order
151 -> IO [(Int,Value,Value,Value, Value, Maybe Int)]
152 textSearch conn q p l o ord = query conn textSearchQuery (q,p,p,typeId,ord,o,l)
153 where
154 typeId = nodeTypeId NodeDocument
155
156