2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map.Strict (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 -- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, {-recursiveClustering,-} recursiveClustering', setNodes2clusterNodes)
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
31 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
32 import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map.Strict as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
59 data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
60 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
61 instance FromJSON BridgenessMethod
62 instance ToJSON BridgenessMethod
63 instance ToSchema BridgenessMethod
64 instance Arbitrary BridgenessMethod where
65 arbitrary = elements [ minBound .. maxBound ]
68 -------------------------------------------------------------
69 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
70 -- defaultClustering x = pure $ BAC.defaultClustering x
71 defaultClustering x = spinglass 1 x
73 -------------------------------------------------------------
74 type Threshold = Double
76 cooc2graph' :: Ord t => Similarity
79 -> Map (Index, Index) Double
80 cooc2graph' distance threshold myCooc
81 = Map.filter (> threshold)
85 Conditional -> map2mat Triangle 0 tiSize
86 _ -> map2mat Square 0 tiSize
87 $ Map.filter (> 1) myCooc'
90 (ti, _) = createIndices myCooc
92 myCooc' = toIndex ti myCooc
95 -- coocurrences graph computation
96 cooc2graphWith :: PartitionMethod
102 -> HashMap (NgramsTerm, NgramsTerm) Int
104 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
105 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
106 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
107 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
108 -- TODO: change these options, or make them configurable in UI?
110 cooc2graphWith' :: Partitions
116 -> HashMap (NgramsTerm, NgramsTerm) Int
118 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold strength myCooc = do
119 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
120 distanceMap `seq` diag `seq` ti `seq` return ()
122 partitions <- if (Map.size distanceMap > 0)
123 then recursiveClustering' (spinglass' 1) distanceMap
124 else panic $ Text.intercalate " " [ "I can not compute the graph you request"
125 , "because either the quantity of documents"
126 , "or the quantity of terms"
128 , "Solution: add more either Documents or Map Terms to your analysis."
129 , "Follow the available tutorials on the Training EcoSystems."
130 , "Ask your co-users of GarganText how to have access to it."
132 length partitions `seq` return ()
135 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
136 !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0 similarity) distanceMap
138 pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
141 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Distributional threshold strength myCooc = do
142 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
143 distanceMap `seq` diag `seq` ti `seq` return ()
145 partitions <- if (Map.size distanceMap > 0)
146 then recursiveClustering (spinglass 1) distanceMap
147 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
148 , "Maybe you should add more Map Terms in your list"
151 length partitions `seq` return ()
154 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
155 !bridgeness' = bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
157 pure $ data2graph multi ti diag bridgeness' confluence' partitions
164 doSimilarityMap :: Similarity
167 -> HashMap (NgramsTerm, NgramsTerm) Int
168 -> ( Map (Int,Int) Double
169 , Map (Index, Index) Int
170 , Map NgramsTerm Index
173 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
175 myCooc' = Map.fromList $ HashMap.toList myCooc
177 (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
179 $ HashMap.toList myCooc
181 (ti, _it) = createIndices theMatrix
184 similarities = (\m -> m `seq` m)
185 $ (\m -> m `seq` measure Conditional m)
186 $ (\m -> m `seq` map2mat Square 0 tiSize m)
187 $ theMatrix `seq` toIndex ti theMatrix
189 links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
190 distanceMap = Map.fromList
192 $ (if strength == Weak then List.reverse else identity)
195 $ Map.filter (> threshold)
196 $ similarities `seq` mat2map similarities
198 doSimilarityMap distriType threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
201 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
203 $ HashMap.toList myCooc
205 (ti, _it) = createIndices theMatrix
208 similarities = (\m -> m `seq` m)
209 $ (\m -> m `seq` measure distriType m)
210 $ (\m -> m `seq` map2mat Square 0 tiSize m)
211 $ theMatrix `seq` toIndex ti theMatrix
213 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
215 distanceMap = Map.fromList
217 $ (if strength == Weak then List.reverse else identity)
221 $ (\m -> m `seq` Map.filter (> threshold) m)
222 $ similarities `seq` mat2map similarities
224 ----------------------------------------------------------
225 -- | From data to Graph
226 type Occurrences = Int
228 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
229 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
230 if HashSet.member t s1
234 data2graph :: MultiPartite
235 -> Map NgramsTerm Int
236 -> Map (Int, Int) Occurrences
237 -> Map (Int, Int) Double
238 -> Map (Int, Int) Double
241 data2graph multi labels' occurences bridge conf partitions =
242 Graph { _graph_nodes = nodes
243 , _graph_edges = edges
244 , _graph_metadata = Nothing
249 nodes = map (setCoord ForceAtlas labels bridge)
250 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
251 , node_type = nodeTypeWith multi label
252 , node_id = (cs . show) n
253 , node_label = unNgramsTerm label
257 Attributes { clust_default = fromMaybe 0
258 (Map.lookup n community_id_by_node_id)
263 | (label, n) <- labels
264 , Set.member n toKeep
267 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
269 edges = [ Edge { edge_source = cs (show s)
270 , edge_hidden = Nothing
271 , edge_target = cs (show t)
272 , edge_weight = weight
273 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
274 , edge_id = cs (show i)
276 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
281 community_id_by_node_id = Map.fromList
282 $ map nodeId2comId partitions
284 labels = Map.toList labels'
287 ------------------------------------------------------------------------
289 data Layout = KamadaKawai | ACP | ForceAtlas
292 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
293 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
299 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
300 setCoord l labels m (n,node) = node { node_x_coord = x
304 (x,y) = getCoord l labels m n
310 -> Map (Int, Int) Double
313 getCoord KamadaKawai _ _m _n = undefined -- layout m n
315 getCoord ForceAtlas _ _ n = (sin d, cos d)
319 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
321 $ pcaReduceTo (Dimension 2)
324 to2d :: Vec.Vector Double -> (Double, Double)
327 ds = take 2 $ Vec.toList v
331 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
332 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
336 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
337 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
338 ------------------------------------------------------------------------
340 -- | KamadaKawai Layout
341 -- TODO TEST: check labels, nodeId and coordinates
342 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
343 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
345 coord :: (Map Int (Double,Double))
346 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
347 --p = Layout.defaultLGL
348 p = Layout.kamadaKawai
349 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
351 -----------------------------------------------------------------------------
353 cooc2graph'' :: Ord t => Similarity
356 -> Map (Index, Index) Double
357 cooc2graph'' distance threshold myCooc = neighbourMap
359 (ti, _) = createIndices myCooc
360 myCooc' = toIndex ti myCooc
361 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
362 distanceMat = measure distance matCooc
363 neighbourMap = filterByNeighbours threshold
364 $ mat2map distanceMat
367 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
368 filterByNeighbours threshold distanceMap = filteredMap
371 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
372 filteredMap :: Map (Index, Index) Double
373 filteredMap = Map.fromList
376 let selected = List.reverse
380 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
381 in List.take (round threshold) selected