import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Debug.Trace (trace)
+import Gargantext.Core
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Data.HashMap.Strict.Utils as HM
-import Gargantext.Database.Admin.Config (nodeTypeId)
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
$ HM.toList m
------------------------------------------------------------------------
-getNodesByNgramsUser :: CorpusId
+getNodesByNgramsUser :: HasDBid NodeType
+ => CorpusId
-> NgramsType
-> Cmd err (HashMap NgramsTerm (Set NodeId))
getNodesByNgramsUser cId nt =
<$> selectNgramsByNodeUser cId nt
where
- selectNgramsByNodeUser :: CorpusId
+ selectNgramsByNodeUser :: HasDBid NodeType
+ => CorpusId
-> NgramsType
-> Cmd err [(NodeId, Text)]
selectNgramsByNodeUser cId' nt' =
runPGSQuery queryNgramsByNodeUser
( cId'
- , nodeTypeId NodeDocument
+ , toDBid NodeDocument
, ngramsTypeId nt'
-- , 100 :: Int -- limit
-- , 0 :: Int -- offset
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
- AND n.typename = ? -- NodeTypeId
+ AND n.typename = ? -- toDBid
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
|]
------------------------------------------------------------------------
-- TODO add groups
-getOccByNgramsOnlyFast :: CorpusId
+getOccByNgramsOnlyFast :: HasDBid NodeType
+ => CorpusId
-> NgramsType
-> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int)
HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
+getOccByNgramsOnlyFast_withSample :: HasDBid NodeType
+ => CorpusId
+ -> Int
+ -> NgramsType
+ -> [NgramsTerm]
+ -> Cmd err (HashMap NgramsTerm Int)
+getOccByNgramsOnlyFast_withSample cId int nt ngs =
+ HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser_withSample cId int nt ngs
+
+
+
+
getOccByNgramsOnlyFast' :: CorpusId
-> ListId
-> NgramsType
-- just slower than getOccByNgramsOnlyFast
-getOccByNgramsOnlySlow :: NodeType
+getOccByNgramsOnlySlow :: HasDBid NodeType
+ => NodeType
-> CorpusId
-> [ListId]
-> NgramsType
getScore' NodeDocument = getNgramsByDocOnlyUser
getScore' _ = getNodesByNgramsOnlyUser
-getOccByNgramsOnlySafe :: CorpusId
+getOccByNgramsOnlySafe :: HasDBid NodeType
+ => CorpusId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
pure slow
-selectNgramsOccurrencesOnlyByNodeUser :: CorpusId
+selectNgramsOccurrencesOnlyByNodeUser :: HasDBid NodeType
+ => CorpusId
-> NgramsType
-> [NgramsTerm]
-> Cmd err [(NgramsTerm, Int)]
runPGSQuery queryNgramsOccurrencesOnlyByNodeUser
( Values fields ((DPS.Only . unNgramsTerm) <$> tms)
, cId
- , nodeTypeId NodeDocument
+ , toDBid NodeDocument
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
+
+
-- same as queryNgramsOnlyByNodeUser but using COUNT on the node ids.
-- Question: with the grouping is the result exactly the same (since Set NodeId for
-- equivalent ngrams intersections are not empty)
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
- AND n.typename = ? -- NodeTypeId
+ AND n.typename = ? -- toDBid
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
|]
+
+selectNgramsOccurrencesOnlyByNodeUser_withSample :: HasDBid NodeType
+ => CorpusId
+ -> Int
+ -> NgramsType
+ -> [NgramsTerm]
+ -> Cmd err [(NgramsTerm, Int)]
+selectNgramsOccurrencesOnlyByNodeUser_withSample cId int nt tms =
+ fmap (first NgramsTerm) <$>
+ runPGSQuery queryNgramsOccurrencesOnlyByNodeUser_withSample
+ ( int
+ , toDBid NodeDocument
+ , cId
+ , Values fields ((DPS.Only . unNgramsTerm) <$> tms)
+ , cId
+ , ngramsTypeId nt
+ )
+ where
+ fields = [QualifiedIdentifier Nothing "text"]
+
+queryNgramsOccurrencesOnlyByNodeUser_withSample :: DPS.Query
+queryNgramsOccurrencesOnlyByNodeUser_withSample = [sql|
+ WITH nodes_sample AS (SELECT id FROM nodes n TABLESAMPLE SYSTEM_ROWS (?)
+ JOIN nodes_nodes nn ON n.id = nn.node2_id
+ WHERE n.typename = ?
+ AND nn.node1_id = ?),
+ input_rows(terms) AS (?)
+ SELECT ng.terms, COUNT(nng.node2_id) FROM node_node_ngrams nng
+ JOIN ngrams ng ON nng.ngrams_id = ng.id
+ JOIN input_rows ir ON ir.terms = ng.terms
+ JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
+ JOIN nodes_sample n ON nn.node2_id = n.id
+ WHERE nn.node1_id = ? -- CorpusId
+ AND nng.ngrams_type = ? -- NgramsTypeId
+ AND nn.category > 0
+ GROUP BY nng.node2_id, ng.terms
+ |]
+
+
+
queryNgramsOccurrencesOnlyByNodeUser' :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser' = [sql|
WITH input_rows(terms) AS (?)
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
- AND n.typename = ? -- NodeTypeId
+ AND n.typename = ? -- toDBid
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
|]
------------------------------------------------------------------------
-getNodesByNgramsOnlyUser :: CorpusId
+getNodesByNgramsOnlyUser :: HasDBid NodeType
+ => CorpusId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
(splitEvery 1000 ngs)
-getNgramsByNodeOnlyUser :: NodeId
+getNgramsByNodeOnlyUser :: HasDBid NodeType
+ => NodeId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
(splitEvery 1000 ngs)
------------------------------------------------------------------------
-selectNgramsOnlyByNodeUser :: CorpusId
+selectNgramsOnlyByNodeUser :: HasDBid NodeType
+ => CorpusId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
, Values [QualifiedIdentifier Nothing "int4"]
(DPS.Only <$> (map (\(NodeId n) -> n) ls))
, cId
- , nodeTypeId NodeDocument
+ , toDBid NodeDocument
, ngramsTypeId nt
)
where
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
- AND n.typename = ? -- NodeTypeId
+ AND n.typename = ? -- toDBid
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY ng.terms, nng.node2_id
|]
-selectNgramsOnlyByNodeUser' :: CorpusId
+selectNgramsOnlyByNodeUser' :: HasDBid NodeType
+ => CorpusId
-> [ListId]
-> NgramsType
-> [Text]
, Values [QualifiedIdentifier Nothing "int4"]
(DPS.Only <$> (map (\(NodeId n) -> n) ls))
, cId
- , nodeTypeId NodeDocument
+ , toDBid NodeDocument
, ngramsTypeId nt
)
where
------------------------------------------------------------------------
-- | TODO filter by language, database, any social field
-getNodesByNgramsMaster :: UserCorpusId -> MasterCorpusId -> Cmd err (HashMap Text (Set NodeId))
+getNodesByNgramsMaster :: HasDBid NodeType
+ => UserCorpusId -> MasterCorpusId -> Cmd err (HashMap Text (Set NodeId))
getNodesByNgramsMaster ucId mcId = unionsWith (<>)
. map (HM.fromListWith (<>) . map (\(n,t) -> (t, Set.singleton n)))
-- . takeWhile (not . List.null)
-- . takeWhile (\l -> List.length l > 3)
<$> mapM (selectNgramsByNodeMaster 1000 ucId mcId) [0,500..10000]
-selectNgramsByNodeMaster :: Int
+selectNgramsByNodeMaster :: HasDBid NodeType
+ => Int
-> UserCorpusId
-> MasterCorpusId
-> Int
queryNgramsByNodeMaster'
( ucId
, ngramsTypeId NgramsTerms
- , nodeTypeId NodeDocument
+ , toDBid NodeDocument
, p
- , nodeTypeId NodeDocument
+ , toDBid NodeDocument
, p
, n
, mcId
- , nodeTypeId NodeDocument
+ , toDBid NodeDocument
, ngramsTypeId NgramsTerms
)
JOIN node_node_ngrams nng ON nng.node2_id = n.id
JOIN ngrams ng ON nng.ngrams_id = ng.id
WHERE nn.node1_id = ? -- UserCorpusId
- -- AND n.typename = ? -- NodeTypeId
+ -- AND n.typename = ? -- toDBid
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
AND node_pos(n.id,?) >= ?
JOIN node_node_ngrams nng ON n.id = nng.node2_id
JOIN ngrams ng ON ng.id = nng.ngrams_id
- WHERE n.parent_id = ? -- Master Corpus NodeTypeId
- AND n.typename = ? -- NodeTypeId
+ WHERE n.parent_id = ? -- Master Corpus toDBid
+ AND n.typename = ? -- toDBid
AND nng.ngrams_type = ? -- NgramsTypeId
GROUP BY n.id, ng.terms
)