]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
Merge branch 'dev' into 97-dev-istex-search
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12
13 module Gargantext.Database.Action.Search where
14
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
17 import Data.Maybe
18 import Data.Text (Text, unpack, intercalate)
19 import Data.Time (UTCTime)
20 import Gargantext.Core
21 import Gargantext.Core.Types
22 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
23 import Gargantext.Database.Prelude (Cmd, runOpaQuery, runCountOpaQuery)
24 import Gargantext.Database.Query.Facet
25 import Gargantext.Database.Query.Filter
26 import Gargantext.Database.Query.Join (leftJoin5)
27 import Gargantext.Database.Query.Table.Node
28 import Gargantext.Database.Query.Table.Context
29 import Gargantext.Database.Query.Table.NodeNode
30 import Gargantext.Database.Query.Table.NodeContext
31 import Gargantext.Database.Schema.Node
32 import Gargantext.Database.Schema.Context
33 import Gargantext.Prelude
34 import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
35 import Opaleye hiding (Order)
36 import Data.Profunctor.Product (p4)
37 import qualified Opaleye as O hiding (Order)
38
39 ------------------------------------------------------------------------
40 searchDocInDatabase :: HasDBid NodeType
41 => ParentId
42 -> Text
43 -> Cmd err [(NodeId, HyperdataDocument)]
44 searchDocInDatabase _p t = runOpaQuery (queryDocInDatabase t)
45 where
46 -- | Global search query where ParentId is Master Node Corpus Id
47 queryDocInDatabase :: Text -> O.Select (Column SqlInt4, Column SqlJsonb)
48 queryDocInDatabase q = proc () -> do
49 row <- queryNodeSearchTable -< ()
50 restrict -< (_ns_search row) @@ (sqlTSQuery (unpack q))
51 restrict -< (_ns_typename row) .== (sqlInt4 $ toDBid NodeDocument)
52 returnA -< (_ns_id row, _ns_hyperdata row)
53
54 ------------------------------------------------------------------------
55 -- | todo add limit and offset and order
56 searchInCorpus :: HasDBid NodeType
57 => CorpusId
58 -> IsTrash
59 -> [Text]
60 -> Maybe Offset
61 -> Maybe Limit
62 -> Maybe OrderBy
63 -> Cmd err [FacetDoc]
64 searchInCorpus cId t q o l order = runOpaQuery
65 $ filterWith o l order
66 $ queryInCorpus cId t
67 $ intercalate " | "
68 $ map stemIt q
69
70 searchCountInCorpus :: HasDBid NodeType
71 => CorpusId
72 -> IsTrash
73 -> [Text]
74 -> Cmd err Int
75 searchCountInCorpus cId t q = runCountOpaQuery
76 $ queryInCorpus cId t
77 $ intercalate " | "
78 $ map stemIt q
79
80 queryInCorpus :: HasDBid NodeType
81 => CorpusId
82 -> IsTrash
83 -> Text
84 -> O.Select FacetDocRead
85 queryInCorpus cId t q = proc () -> do
86 (c, nc) <- joinInCorpus -< ()
87 restrict -< (nc^.nc_node_id) .== (toNullable $ pgNodeId cId)
88 restrict -< if t
89 then (nc^.nc_category) .== (toNullable $ sqlInt4 0)
90 else (nc^.nc_category) .>= (toNullable $ sqlInt4 1)
91 restrict -< (c ^. cs_search) @@ (sqlTSQuery (unpack q))
92 restrict -< (c ^. cs_typename ) .== (sqlInt4 $ toDBid NodeDocument)
93 returnA -< FacetDoc { facetDoc_id = c^.cs_id
94 , facetDoc_created = c^.cs_date
95 , facetDoc_title = c^.cs_name
96 , facetDoc_hyperdata = c^.cs_hyperdata
97 , facetDoc_category = nc^.nc_category
98 , facetDoc_ngramCount = nc^.nc_score
99 , facetDoc_score = nc^.nc_score
100 }
101
102 joinInCorpus :: O.Select (ContextSearchRead, NodeContextReadNull)
103 joinInCorpus = leftJoin queryContextSearchTable queryNodeContextTable cond
104 where
105 cond :: (ContextSearchRead, NodeContextRead) -> Column SqlBool
106 cond (c, nc) = nc^.nc_context_id .== _cs_id c
107
108 ------------------------------------------------------------------------
109 searchInCorpusWithContacts
110 :: HasDBid NodeType
111 => CorpusId
112 -> AnnuaireId
113 -> [Text]
114 -> Maybe Offset
115 -> Maybe Limit
116 -> Maybe OrderBy
117 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
118 searchInCorpusWithContacts cId aId q o l _order =
119 runOpaQuery $ limit' l
120 $ offset' o
121 $ orderBy ( desc _fp_score)
122 $ selectGroup cId aId
123 $ intercalate " | "
124 $ map stemIt q
125
126 selectContactViaDoc
127 :: HasDBid NodeType
128 => CorpusId
129 -> AnnuaireId
130 -> Text
131 -> SelectArr ()
132 ( Column (Nullable SqlInt4)
133 , Column (Nullable SqlTimestamptz)
134 , Column (Nullable SqlJsonb)
135 , Column (Nullable SqlInt4)
136 )
137 selectContactViaDoc cId aId q = proc () -> do
138 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
139 restrict -< (doc^.ns_search) @@ (sqlTSQuery $ unpack q )
140 restrict -< (doc^.ns_typename) .== (sqlInt4 $ toDBid NodeDocument)
141 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
142 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
143 restrict -< (contact^.node_typename) .== (toNullable $ sqlInt4 $ toDBid NodeContact)
144 returnA -< ( contact^.node_id
145 , contact^.node_date
146 , contact^.node_hyperdata
147 , toNullable $ sqlInt4 1
148 )
149
150 selectGroup :: HasDBid NodeType
151 => NodeId
152 -> NodeId
153 -> Text
154 -> Select FacetPairedReadNull
155 selectGroup cId aId q = proc () -> do
156 (a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
157 (selectContactViaDoc cId aId q) -< ()
158 returnA -< FacetPaired a b c d
159
160
161 queryContactViaDoc :: O.Select ( NodeSearchRead
162 , ( NodeNodeReadNull
163 , ( NodeNodeReadNull
164 , ( NodeNodeReadNull
165 , NodeReadNull
166 )
167 )
168 )
169 )
170 queryContactViaDoc =
171 leftJoin5
172 queryNodeTable
173 queryNodeNodeTable
174 queryNodeNodeTable
175 queryNodeNodeTable
176 queryNodeSearchTable
177 cond12
178 cond23
179 cond34
180 cond45
181 where
182 cond12 :: (NodeNodeRead, NodeRead) -> Column SqlBool
183 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
184
185 cond23 :: ( NodeNodeRead
186 , ( NodeNodeRead
187 , NodeReadNull
188 )
189 ) -> Column SqlBool
190 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
191
192 cond34 :: ( NodeNodeRead
193 , ( NodeNodeRead
194 , ( NodeNodeReadNull
195 , NodeReadNull
196 )
197 )
198 ) -> Column SqlBool
199 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
200
201
202 cond45 :: ( NodeSearchRead
203 , ( NodeNodeRead
204 , ( NodeNodeReadNull
205 , ( NodeNodeReadNull
206 , NodeReadNull
207 )
208 )
209 )
210 ) -> Column SqlBool
211 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
212
213
214 ------------------------------------------------------------------------