]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Search.hs
[john-snow] implement pos/lemma language
[gargantext.git] / src / Gargantext / Database / Action / Search.hs
1 {-|
2 Module : Gargantext.Database.TextSearch
3 Description : Postgres text search experimentation
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE Arrows #-}
12
13 module Gargantext.Database.Action.Search where
14
15 import Control.Arrow (returnA)
16 import Control.Lens ((^.))
17 import Data.Maybe
18 import Data.Text (Text, unpack, intercalate)
19 import Data.Time (UTCTime)
20 import Gargantext.Core
21 import Gargantext.Core.Types
22 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
23 import Gargantext.Database.Prelude (Cmd, runOpaQuery, runCountOpaQuery)
24 import Gargantext.Database.Query.Facet
25 import Gargantext.Database.Query.Filter
26 import Gargantext.Database.Query.Join (leftJoin5)
27 import Gargantext.Database.Query.Table.Node
28 import Gargantext.Database.Query.Table.NodeNode
29 import Gargantext.Database.Schema.Node
30 import Gargantext.Prelude
31 import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
32 import Opaleye hiding (Order)
33 import Data.Profunctor.Product (p4)
34 import qualified Opaleye as O hiding (Order)
35
36 ------------------------------------------------------------------------
37 searchDocInDatabase :: HasDBid NodeType
38 => ParentId
39 -> Text
40 -> Cmd err [(NodeId, HyperdataDocument)]
41 searchDocInDatabase _p t = runOpaQuery (queryDocInDatabase t)
42 where
43 -- | Global search query where ParentId is Master Node Corpus Id
44 queryDocInDatabase :: Text -> O.Select (Column SqlInt4, Column SqlJsonb)
45 queryDocInDatabase q = proc () -> do
46 row <- queryNodeSearchTable -< ()
47 restrict -< (_ns_search row) @@ (sqlTSQuery (unpack q))
48 restrict -< (_ns_typename row) .== (sqlInt4 $ toDBid NodeDocument)
49 returnA -< (_ns_id row, _ns_hyperdata row)
50
51 ------------------------------------------------------------------------
52 -- | todo add limit and offset and order
53 searchInCorpus :: HasDBid NodeType
54 => CorpusId
55 -> IsTrash
56 -> [Text]
57 -> Maybe Offset
58 -> Maybe Limit
59 -> Maybe OrderBy
60 -> Cmd err [FacetDoc]
61 searchInCorpus cId t q o l order = runOpaQuery
62 $ filterWith o l order
63 $ queryInCorpus cId t
64 $ intercalate " | "
65 $ map stemIt q
66
67 searchCountInCorpus :: HasDBid NodeType
68 => CorpusId
69 -> IsTrash
70 -> [Text]
71 -> Cmd err Int
72 searchCountInCorpus cId t q = runCountOpaQuery
73 $ queryInCorpus cId t
74 $ intercalate " | "
75 $ map stemIt q
76
77 queryInCorpus :: HasDBid NodeType
78 => CorpusId
79 -> IsTrash
80 -> Text
81 -> O.Select FacetDocRead
82 queryInCorpus cId t q = proc () -> do
83 (n, nn) <- joinInCorpus -< ()
84 restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
85 restrict -< if t
86 then (nn^.nn_category) .== (toNullable $ sqlInt4 0)
87 else (nn^.nn_category) .>= (toNullable $ sqlInt4 1)
88 restrict -< (n ^. ns_search) @@ (sqlTSQuery (unpack q))
89 restrict -< (n ^. ns_typename ) .== (sqlInt4 $ toDBid NodeDocument)
90 returnA -< FacetDoc { facetDoc_id = n^.ns_id
91 , facetDoc_created = n^.ns_date
92 , facetDoc_title = n^.ns_name
93 , facetDoc_hyperdata = n^.ns_hyperdata
94 , facetDoc_category = nn^.nn_category
95 , facetDoc_ngramCount = nn^.nn_score
96 , facetDoc_score = nn^.nn_score
97 }
98
99 joinInCorpus :: O.Select (NodeSearchRead, NodeNodeReadNull)
100 joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
101 where
102 cond :: (NodeSearchRead, NodeNodeRead) -> Column SqlBool
103 cond (n, nn) = nn^.nn_node2_id .== _ns_id n
104
105 ------------------------------------------------------------------------
106 searchInCorpusWithContacts
107 :: HasDBid NodeType
108 => CorpusId
109 -> AnnuaireId
110 -> [Text]
111 -> Maybe Offset
112 -> Maybe Limit
113 -> Maybe OrderBy
114 -> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
115 searchInCorpusWithContacts cId aId q o l _order =
116 runOpaQuery $ limit' l
117 $ offset' o
118 $ orderBy ( desc _fp_score)
119 $ selectGroup cId aId
120 $ intercalate " | "
121 $ map stemIt q
122
123 selectContactViaDoc
124 :: HasDBid NodeType
125 => CorpusId
126 -> AnnuaireId
127 -> Text
128 -> SelectArr ()
129 ( Column (Nullable SqlInt4)
130 , Column (Nullable SqlTimestamptz)
131 , Column (Nullable SqlJsonb)
132 , Column (Nullable SqlInt4)
133 )
134 selectContactViaDoc cId aId q = proc () -> do
135 (doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
136 restrict -< (doc^.ns_search) @@ (sqlTSQuery $ unpack q )
137 restrict -< (doc^.ns_typename) .== (sqlInt4 $ toDBid NodeDocument)
138 restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
139 restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
140 restrict -< (contact^.node_typename) .== (toNullable $ sqlInt4 $ toDBid NodeContact)
141 returnA -< ( contact^.node_id
142 , contact^.node_date
143 , contact^.node_hyperdata
144 , toNullable $ sqlInt4 1
145 )
146
147 selectGroup :: HasDBid NodeType
148 => NodeId
149 -> NodeId
150 -> Text
151 -> Select FacetPairedReadNull
152 selectGroup cId aId q = proc () -> do
153 (a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
154 (selectContactViaDoc cId aId q) -< ()
155 returnA -< FacetPaired a b c d
156
157
158 queryContactViaDoc :: O.Select ( NodeSearchRead
159 , ( NodeNodeReadNull
160 , ( NodeNodeReadNull
161 , ( NodeNodeReadNull
162 , NodeReadNull
163 )
164 )
165 )
166 )
167 queryContactViaDoc =
168 leftJoin5
169 queryNodeTable
170 queryNodeNodeTable
171 queryNodeNodeTable
172 queryNodeNodeTable
173 queryNodeSearchTable
174 cond12
175 cond23
176 cond34
177 cond45
178 where
179 cond12 :: (NodeNodeRead, NodeRead) -> Column SqlBool
180 cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
181
182 cond23 :: ( NodeNodeRead
183 , ( NodeNodeRead
184 , NodeReadNull
185 )
186 ) -> Column SqlBool
187 cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
188
189 cond34 :: ( NodeNodeRead
190 , ( NodeNodeRead
191 , ( NodeNodeReadNull
192 , NodeReadNull
193 )
194 )
195 ) -> Column SqlBool
196 cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
197
198
199 cond45 :: ( NodeSearchRead
200 , ( NodeNodeRead
201 , ( NodeNodeReadNull
202 , ( NodeNodeReadNull
203 , NodeReadNull
204 )
205 )
206 )
207 ) -> Column SqlBool
208 cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
209
210
211 ------------------------------------------------------------------------