]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Metrics/Count.hs
[FIX] rdf lib.
[gargantext.git] / src / Gargantext / Database / Metrics / Count.hs
1 {-|
2 Module : Gargantext.Database.Metrics.Count
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Count Ngrams by Context
11
12 -}
13
14 {-# LANGUAGE QuasiQuotes #-}
15 {-# LANGUAGE Arrows #-}
16 {-# LANGUAGE NoImplicitPrelude #-}
17 {-# LANGUAGE OverloadedStrings #-}
18 {-# LANGUAGE QuasiQuotes #-}
19 {-# LANGUAGE RankNTypes #-}
20
21 module Gargantext.Database.Metrics.Count where
22
23 import Control.Arrow (returnA)
24 import Control.Lens (view)
25 import Data.Map.Strict (Map, fromListWith, elems)
26 import Data.Monoid (mempty)
27 import Data.Text (Text)
28 import Database.PostgreSQL.Simple.SqlQQ (sql)
29 import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement)
30 import Gargantext.Core.Types.Main (listTypeId, ListType(..))
31 import Gargantext.Database.Access
32 import Gargantext.Database.Config (nodeTypeId)
33 import Gargantext.Database.Queries.Join (leftJoin4, leftJoin5, leftJoin3)
34 import Gargantext.Database.Schema.Ngrams
35 import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsType(..), ngramsTypeId, Ngrams(..), NgramsIndexed(..), ngrams, ngramsTerms, fromNgramsTypeId)
36 import Gargantext.Database.Schema.Node
37 import Gargantext.Database.Schema.Node (HasNodeError(..))
38 import Gargantext.Database.Schema.NodeNgram
39 import Gargantext.Database.Schema.NodeNode
40 import Gargantext.Database.Schema.NodeNodeNgrams
41 import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
42 import Gargantext.Database.Utils
43 import Gargantext.Database.Utils (Cmd, runPGSQuery)
44 import Gargantext.Prelude hiding (sum)
45 import Gargantext.Text.Metrics.Count (Coocs, coocOn)
46 import Opaleye
47 import Safe (headMay)
48 import qualified Database.PostgreSQL.Simple as PGS
49
50 getCoocByDocDev :: HasNodeError err => CorpusId -> ListId -> Cmd err (Map ([Text], [Text]) Int)
51 getCoocByDocDev cId lId = coocOn (\n-> [ view ( ngrams . ngramsTerms) n]) <$> getNgramsByDoc cId lId
52
53 getCoocByDoc :: CorpusId -> ListId -> Cmd err (Map (NgramsIndexed, NgramsIndexed) Coocs)
54 getCoocByDoc cId lId = coocOn identity <$> getNgramsByDoc cId lId
55
56
57 getNgramsByDoc :: CorpusId -> ListId -> Cmd err [[NgramsIndexed]]
58 getNgramsByDoc cId lId =
59 elems
60 <$> fromListWith (<>)
61 <$> map (\(nId, ngId, nt, n) -> (nId, [NgramsIndexed (Ngrams nt n) ngId]))
62 <$> getNgramsByDocDb cId lId
63
64
65 getNgramsByDocDb :: CorpusId -> ListId -> Cmd err [(NodeId, NgramsId, Text, Int)]
66 getNgramsByDocDb cId lId = runPGSQuery query params
67 where
68 params = (cId, lId, listTypeId GraphTerm, ngramsTypeId NgramsTerms)
69 query = [sql|
70
71 -- TODO add CTE
72 SELECT n.id, ng.id, ng.terms, ng.n -- , list.parent_id
73 FROM nodes n
74 JOIN nodes_nodes nn ON nn.node2_id = n.id
75 JOIN nodes_ngrams nng ON nng.node_id = nn.node2_id
76 JOIN nodes_ngrams list ON list.ngrams_id = nng.ngrams_id
77 JOIN ngrams ng ON ng.id = nng.ngrams_id
78 WHERE nn.node1_id = ? -- CorpusId
79 AND list.node_id = ? -- ListId
80 AND list.list_type = ? -- GraphListId
81 AND list.ngrams_type = ? -- NgramsTypeId
82
83 |]
84
85
86 getNgramsByNode :: NodeId -> NgramsType -> Cmd err [[Text]]
87 getNgramsByNode nId nt = elems
88 <$> fromListWith (<>)
89 <$> map (\(i,t) -> (i,[t]))
90 <$> getNgramsByNodeNodeIndexed nId nt
91
92 -- | TODO add join with nodeNodeNgram (if it exists)
93 getNgramsByNodeNodeIndexed :: NodeId -> NgramsType -> Cmd err [(NodeId, Text)]
94 getNgramsByNodeNodeIndexed nId nt = runOpaQuery (select' nId)
95 where
96 select' nId' = proc () -> do
97 (ng,(nng,(nn,n))) <- getNgramsByNodeNodeIndexedJoin -< ()
98 restrict -< _node_id n .== toNullable (pgNodeId nId')
99 restrict -< nng_ngramsType nng .== toNullable (pgNgramsTypeId $ ngramsTypeId nt)
100 restrict -< nn_delete nn ./= (toNullable . pgBool) True
101 returnA -< (nng_node_id nng, ngrams_terms ng)
102
103
104 {-
105 getNgramsByNodeIndexed' :: NodeId -> NgramsType -> Cmd err [(NodeId, Maybe Text)]
106 getNgramsByNodeIndexed' nId nt = runOpaQuery (select' nId)
107 where
108 select' nId' = proc () -> do
109 (nnng,(ng,(nng,(_,n)))) <- getNgramsByNodeIndexedJoin5 -< ()
110 restrict -< _node_id n .== toNullable (pgNodeId nId')
111 restrict -< nng_ngramsType nng .== toNullable (pgNgramsTypeId $ ngramsTypeId nt)
112
113 let node_id' = ifThenElse (isNull $ toNullable $ nnng_node1_id nnng)
114 (nng_node_id nng)
115 (nnng_node2_id nng)
116 let t1 = ifThenElse (isNull $ toNullable $ nnng_node1_id nnng)
117 (ngrams_terms ng)
118 (nnng_terms nng)
119 returnA -< (n1, t1)
120 --}
121
122 getNgramsByNodeNodeIndexedJoin :: Query ( NgramsRead
123 , (NodeNgramReadNull
124 , (NodeNodeReadNull
125 , NodeReadNull
126 )
127 )
128 )
129 getNgramsByNodeNodeIndexedJoin = leftJoin4 queryNodeTable
130 queryNodeNodeTable
131 queryNodeNgramTable
132 queryNgramsTable
133 c1 c2 c3
134 where
135 c1 :: (NodeNodeRead, NodeRead) -> Column PGBool
136 c1 (nn,n) = nn_node1_id nn .== _node_id n
137
138 c2 :: ( NodeNgramRead
139 , (NodeNodeRead
140 , NodeReadNull
141 )
142 ) -> Column PGBool
143 c2 (nng,(nn',_)) = (nng_node_id nng) .== nn_node2_id nn'
144
145 c3 :: ( NgramsRead
146 , ( NodeNgramRead
147 , ( NodeNodeReadNull
148 , NodeReadNull
149 )
150 )
151 ) -> Column PGBool
152 c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng'
153
154
155 getNgramsByNodeNodeIndexedJoin5 :: Query ( NodeNodeNgramsRead
156 , (NgramsReadNull
157 , (NodeNgramReadNull
158 , (NodeNodeReadNull
159 , NodeReadNull
160 )
161 )
162 )
163 )
164 getNgramsByNodeNodeIndexedJoin5 = leftJoin5 queryNodeTable
165 queryNodeNodeTable
166 queryNodeNgramTable
167 queryNgramsTable
168 queryNodeNodeNgramsTable
169 c1 c2 c3 c4
170 where
171 c1 :: (NodeNodeRead, NodeRead) -> Column PGBool
172 c1 (nn,n) = nn_node1_id nn .== _node_id n
173
174 c2 :: ( NodeNgramRead
175 , (NodeNodeRead
176 , NodeReadNull
177 )
178 ) -> Column PGBool
179 c2 (nng,(nn',_)) = (nng_node_id nng) .== nn_node2_id nn'
180
181 c3 :: ( NgramsRead
182 , ( NodeNgramRead
183 , ( NodeNodeReadNull
184 , NodeReadNull
185 )
186 )
187 ) -> Column PGBool
188 c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng'
189
190 c4 :: ( NodeNodeNgramsRead
191 , (NgramsRead
192 , ( NodeNgramReadNull
193 , ( NodeNodeReadNull
194 , NodeReadNull
195 )
196 )
197 )
198 ) -> Column PGBool
199 c4 (nnng,(_,(_,(nn,_)))) = (toNullable $ nnng_node1_id nnng) .== (nn_node1_id nn)
200 .&& (toNullable $ nnng_node2_id nnng) .== (nn_node2_id nn)
201
202 --}
203
204 --{-
205
206 getNgramsElementsWithParentNodeId :: NodeId -> Cmd err (Map NgramsType [NgramsElement])
207 getNgramsElementsWithParentNodeId nId = do
208 ns <- getNgramsWithParentNodeId nId
209 pure $ fromListWith (<>)
210 [ (maybe (panic "error") identity $ fromNgramsTypeId nt,
211 [mkNgramsElement ng CandidateTerm Nothing mempty])
212 | (_,(nt,ng)) <- ns
213 ]
214
215
216 -------------------------------------------------------------------------
217 getNgramsWithParentNodeId :: NodeId -> Cmd err [(NodeId, (NgramsTypeId, Text))]
218 getNgramsWithParentNodeId nId = runOpaQuery (select nId)
219 where
220 select nId' = proc () -> do
221 (ng,(nng,n)) <- getNgramsWithParentNodeIdJoin -< ()
222 restrict -< _node_parentId n .== (toNullable $ pgNodeId nId')
223 restrict -< _node_typename n .== (toNullable $ pgInt4 $ nodeTypeId NodeDocument)
224 returnA -< (nng_node_id nng, (nng_ngramsType nng, ngrams_terms ng))
225 --}
226
227 getNgramsWithParentNodeIdJoin :: Query ( NgramsRead
228 , ( NodeNgramReadNull
229 , NodeReadNull
230 )
231 )
232 getNgramsWithParentNodeIdJoin = leftJoin3 queryNodeTable queryNodeNgramTable queryNgramsTable on1 on2
233 where
234 on1 :: (NodeNgramRead, NodeRead) -> Column PGBool
235 on1 (nng,n) = nng_node_id nng .== _node_id n
236
237 on2 :: (NgramsRead, (NodeNgramRead, NodeReadNull))-> Column PGBool
238 on2 (ng, (nng,_)) = ngrams_id ng .== nng_ngrams_id nng
239
240
241 countCorpusDocuments :: Roles -> Int -> Cmd err Int
242 countCorpusDocuments r cId = maybe 0 identity
243 <$> headMay
244 <$> map (\(PGS.Only n) -> n)
245 <$> runQuery' r cId
246 where
247 runQuery' RoleUser cId' = runPGSQuery
248 "SELECT count(*) from nodes_nodes nn WHERE nn.node1_id = ? AND nn.delete = False"
249 (PGS.Only cId')
250 runQuery' RoleMaster cId' = runPGSQuery
251 "SELECT count(*) from nodes n WHERE n.parent_id = ? AND n.typename = ?"
252 (cId', nodeTypeId NodeDocument)
253
254
255