2 Module : Gargantext.Database.Metrics.Count
3 Description : Ngram connection to the Database
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Count Ngrams by Context
14 {-# LANGUAGE QuasiQuotes #-}
15 {-# LANGUAGE Arrows #-}
16 {-# LANGUAGE NoImplicitPrelude #-}
17 {-# LANGUAGE OverloadedStrings #-}
18 {-# LANGUAGE QuasiQuotes #-}
19 {-# LANGUAGE RankNTypes #-}
21 module Gargantext.Database.Metrics.Count where
23 import Control.Arrow (returnA)
24 import Control.Lens (view)
25 import Data.Map.Strict (Map, fromListWith, elems)
26 import Data.Monoid (mempty)
27 import Data.Text (Text)
28 import Database.PostgreSQL.Simple.SqlQQ (sql)
29 import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement)
30 import Gargantext.Core.Types.Main (listTypeId, ListType(..))
31 import Gargantext.Database.Access
32 import Gargantext.Database.Config (nodeTypeId)
33 import Gargantext.Database.Queries.Join (leftJoin4, leftJoin5, leftJoin3)
34 import Gargantext.Database.Schema.Ngrams
35 import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsType(..), ngramsTypeId, Ngrams(..), NgramsIndexed(..), ngrams, ngramsTerms, fromNgramsTypeId)
36 import Gargantext.Database.Schema.Node
37 import Gargantext.Database.Schema.Node (HasNodeError(..))
38 import Gargantext.Database.Schema.NodeNgram
39 import Gargantext.Database.Schema.NodeNode
40 import Gargantext.Database.Schema.NodeNodeNgrams
41 import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
42 import Gargantext.Database.Utils
43 import Gargantext.Database.Utils (Cmd, runPGSQuery)
44 import Gargantext.Prelude hiding (sum)
45 import Gargantext.Text.Metrics.Count (Coocs, coocOn)
48 import qualified Database.PostgreSQL.Simple as PGS
50 getCoocByDocDev :: HasNodeError err => CorpusId -> ListId -> Cmd err (Map ([Text], [Text]) Int)
51 getCoocByDocDev cId lId = coocOn (\n-> [ view ( ngrams . ngramsTerms) n]) <$> getNgramsByDoc cId lId
53 getCoocByDoc :: CorpusId -> ListId -> Cmd err (Map (NgramsIndexed, NgramsIndexed) Coocs)
54 getCoocByDoc cId lId = coocOn identity <$> getNgramsByDoc cId lId
57 getNgramsByDoc :: CorpusId -> ListId -> Cmd err [[NgramsIndexed]]
58 getNgramsByDoc cId lId =
61 <$> map (\(nId, ngId, nt, n) -> (nId, [NgramsIndexed (Ngrams nt n) ngId]))
62 <$> getNgramsByDocDb cId lId
65 getNgramsByDocDb :: CorpusId -> ListId -> Cmd err [(NodeId, NgramsId, Text, Int)]
66 getNgramsByDocDb cId lId = runPGSQuery query params
68 params = (cId, lId, listTypeId GraphTerm, ngramsTypeId NgramsTerms)
72 SELECT n.id, ng.id, ng.terms, ng.n -- , list.parent_id
74 JOIN nodes_nodes nn ON nn.node2_id = n.id
75 JOIN nodes_ngrams nng ON nng.node_id = nn.node2_id
76 JOIN nodes_ngrams list ON list.ngrams_id = nng.ngrams_id
77 JOIN ngrams ng ON ng.id = nng.ngrams_id
78 WHERE nn.node1_id = ? -- CorpusId
79 AND list.node_id = ? -- ListId
80 AND list.list_type = ? -- GraphListId
81 AND list.ngrams_type = ? -- NgramsTypeId
86 getNgramsByNode :: NodeId -> NgramsType -> Cmd err [[Text]]
87 getNgramsByNode nId nt = elems
89 <$> map (\(i,t) -> (i,[t]))
90 <$> getNgramsByNodeNodeIndexed nId nt
92 -- | TODO add join with nodeNodeNgram (if it exists)
93 getNgramsByNodeNodeIndexed :: NodeId -> NgramsType -> Cmd err [(NodeId, Text)]
94 getNgramsByNodeNodeIndexed nId nt = runOpaQuery (select' nId)
96 select' nId' = proc () -> do
97 (ng,(nng,(nn,n))) <- getNgramsByNodeNodeIndexedJoin -< ()
98 restrict -< _node_id n .== toNullable (pgNodeId nId')
99 restrict -< nng_ngramsType nng .== toNullable (pgNgramsTypeId $ ngramsTypeId nt)
100 restrict -< nn_delete nn ./= (toNullable . pgBool) True
101 returnA -< (nng_node_id nng, ngrams_terms ng)
105 getNgramsByNodeIndexed' :: NodeId -> NgramsType -> Cmd err [(NodeId, Maybe Text)]
106 getNgramsByNodeIndexed' nId nt = runOpaQuery (select' nId)
108 select' nId' = proc () -> do
109 (nnng,(ng,(nng,(_,n)))) <- getNgramsByNodeIndexedJoin5 -< ()
110 restrict -< _node_id n .== toNullable (pgNodeId nId')
111 restrict -< nng_ngramsType nng .== toNullable (pgNgramsTypeId $ ngramsTypeId nt)
113 let node_id' = ifThenElse (isNull $ toNullable $ nnng_node1_id nnng)
116 let t1 = ifThenElse (isNull $ toNullable $ nnng_node1_id nnng)
122 getNgramsByNodeNodeIndexedJoin :: Query ( NgramsRead
129 getNgramsByNodeNodeIndexedJoin = leftJoin4 queryNodeTable
135 c1 :: (NodeNodeRead, NodeRead) -> Column PGBool
136 c1 (nn,n) = nn_node1_id nn .== _node_id n
138 c2 :: ( NodeNgramRead
143 c2 (nng,(nn',_)) = (nng_node_id nng) .== nn_node2_id nn'
152 c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng'
155 getNgramsByNodeNodeIndexedJoin5 :: Query ( NodeNodeNgramsRead
164 getNgramsByNodeNodeIndexedJoin5 = leftJoin5 queryNodeTable
168 queryNodeNodeNgramsTable
171 c1 :: (NodeNodeRead, NodeRead) -> Column PGBool
172 c1 (nn,n) = nn_node1_id nn .== _node_id n
174 c2 :: ( NodeNgramRead
179 c2 (nng,(nn',_)) = (nng_node_id nng) .== nn_node2_id nn'
188 c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng'
190 c4 :: ( NodeNodeNgramsRead
192 , ( NodeNgramReadNull
199 c4 (nnng,(_,(_,(nn,_)))) = (toNullable $ nnng_node1_id nnng) .== (nn_node1_id nn)
200 .&& (toNullable $ nnng_node2_id nnng) .== (nn_node2_id nn)
206 getNgramsElementsWithParentNodeId :: NodeId -> Cmd err (Map NgramsType [NgramsElement])
207 getNgramsElementsWithParentNodeId nId = do
208 ns <- getNgramsWithParentNodeId nId
209 pure $ fromListWith (<>)
210 [ (maybe (panic "error") identity $ fromNgramsTypeId nt,
211 [mkNgramsElement ng CandidateTerm Nothing mempty])
216 -------------------------------------------------------------------------
217 getNgramsWithParentNodeId :: NodeId -> Cmd err [(NodeId, (NgramsTypeId, Text))]
218 getNgramsWithParentNodeId nId = runOpaQuery (select nId)
220 select nId' = proc () -> do
221 (ng,(nng,n)) <- getNgramsWithParentNodeIdJoin -< ()
222 restrict -< _node_parentId n .== (toNullable $ pgNodeId nId')
223 restrict -< _node_typename n .== (toNullable $ pgInt4 $ nodeTypeId NodeDocument)
224 returnA -< (nng_node_id nng, (nng_ngramsType nng, ngrams_terms ng))
227 getNgramsWithParentNodeIdJoin :: Query ( NgramsRead
228 , ( NodeNgramReadNull
232 getNgramsWithParentNodeIdJoin = leftJoin3 queryNodeTable queryNodeNgramTable queryNgramsTable on1 on2
234 on1 :: (NodeNgramRead, NodeRead) -> Column PGBool
235 on1 (nng,n) = nng_node_id nng .== _node_id n
237 on2 :: (NgramsRead, (NodeNgramRead, NodeReadNull))-> Column PGBool
238 on2 (ng, (nng,_)) = ngrams_id ng .== nng_ngrams_id nng
241 countCorpusDocuments :: Roles -> Int -> Cmd err Int
242 countCorpusDocuments r cId = maybe 0 identity
244 <$> map (\(PGS.Only n) -> n)
247 runQuery' RoleUser cId' = runPGSQuery
248 "SELECT count(*) from nodes_nodes nn WHERE nn.node1_id = ? AND nn.delete = False"
250 runQuery' RoleMaster cId' = runPGSQuery
251 "SELECT count(*) from nodes n WHERE n.parent_id = ? AND n.typename = ?"
252 (cId', nodeTypeId NodeDocument)