2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map.Strict (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 -- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, recursiveClustering, recursiveClustering', setNodes2clusterNodes)
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
31 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
32 import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map.Strict as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
59 data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
60 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
61 instance FromJSON BridgenessMethod
62 instance ToJSON BridgenessMethod
63 instance ToSchema BridgenessMethod
64 instance Arbitrary BridgenessMethod where
65 arbitrary = elements [ minBound .. maxBound ]
68 -------------------------------------------------------------
69 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
70 -- defaultClustering x = pure $ BAC.defaultClustering x
71 defaultClustering x = spinglass 1 x
73 -------------------------------------------------------------
74 type Threshold = Double
77 cooc2graph' :: Ord t => Similarity
80 -> Map (Index, Index) Double
81 cooc2graph' distance threshold myCooc
82 = Map.filter (> threshold)
86 Conditional -> map2mat Triangle 0 tiSize
87 Distributional -> map2mat Square 0 tiSize
88 $ Map.filter (> 1) myCooc'
91 (ti, _) = createIndices myCooc
93 myCooc' = toIndex ti myCooc
97 -- coocurrences graph computation
98 cooc2graphWith :: PartitionMethod
104 -> HashMap (NgramsTerm, NgramsTerm) Int
106 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
107 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
108 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
109 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
110 -- TODO: change these options, or make them configurable in UI?
112 cooc2graphWith' :: Partitions
118 -> HashMap (NgramsTerm, NgramsTerm) Int
120 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Conditional threshold strength myCooc = do
121 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
122 distanceMap `seq` diag `seq` ti `seq` return ()
124 partitions <- if (Map.size distanceMap > 0)
125 -- then recursiveClustering doPartitions distanceMap
126 then recursiveClustering' (spinglass' 1) distanceMap
127 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
128 , "Maybe you should add more Map Terms in your list"
131 length partitions `seq` return ()
134 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
135 !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0) distanceMap
137 !bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
138 then bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
139 else bridgeness (Bridgeness_Advanced similarity confluence') distanceMap
141 pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
143 cooc2graphWith' doPartitions bridgenessMethod multi Distributional threshold strength myCooc = do
144 let (distanceMap, diag, ti) = doSimilarityMap Distributional threshold strength myCooc
145 distanceMap `seq` diag `seq` ti `seq` return ()
147 partitions <- if (Map.size distanceMap > 0)
148 then recursiveClustering doPartitions distanceMap
149 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
150 , "Maybe you should add more Map Terms in your list"
153 length partitions `seq` return ()
156 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
157 !bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
158 then bridgeness (Bridgeness_Basic partitions 10.0) distanceMap
159 else bridgeness (Bridgeness_Advanced Distributional confluence') distanceMap
161 pure $ data2graph multi ti diag bridgeness' confluence' partitions
170 doSimilarityMap :: Similarity
173 -> HashMap (NgramsTerm, NgramsTerm) Int
174 -> ( Map (Int,Int) Double
175 , Map (Index, Index) Int
176 , Map NgramsTerm Index
179 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
181 myCooc' = Map.fromList $ HashMap.toList myCooc
183 (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
185 $ HashMap.toList myCooc
187 (ti, _it) = createIndices theMatrix
190 similarities = (\m -> m `seq` m)
191 $ (\m -> m `seq` measure Conditional m)
192 $ (\m -> m `seq` map2mat Square 0 tiSize m)
193 $ theMatrix `seq` toIndex ti theMatrix
195 links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
196 distanceMap = Map.fromList
198 $ (if strength == Weak then List.reverse else identity)
201 $ Map.filter (> threshold)
202 -- $ conditional myCooc
203 $ similarities `seq` mat2map similarities
205 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
208 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
210 $ HashMap.toList myCooc
212 (ti, _it) = createIndices theMatrix
215 similarities = (\m -> m `seq` m)
216 $ (\m -> m `seq` measure Distributional m)
217 $ (\m -> m `seq` map2mat Square 0 tiSize m)
218 $ theMatrix `seq` toIndex ti theMatrix
220 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
222 distanceMap = Map.fromList
224 $ (if strength == Weak then List.reverse else identity)
228 $ (\m -> m `seq` Map.filter (> threshold) m)
229 $ similarities `seq` mat2map similarities
231 ----------------------------------------------------------
232 -- | From data to Graph
233 type Occurrences = Int
235 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
236 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
237 if HashSet.member t s1
242 data2graph :: MultiPartite
243 -> Map NgramsTerm Int
244 -> Map (Int, Int) Occurrences
245 -> Map (Int, Int) Double
246 -> Map (Int, Int) Double
249 data2graph multi labels' occurences bridge conf partitions =
250 Graph { _graph_nodes = nodes
251 , _graph_edges = edges
252 , _graph_metadata = Nothing
257 nodes = map (setCoord ForceAtlas labels bridge)
258 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
259 , node_type = nodeTypeWith multi label
260 , node_id = (cs . show) n
261 , node_label = unNgramsTerm label
265 Attributes { clust_default = fromMaybe 0
266 (Map.lookup n community_id_by_node_id)
271 | (label, n) <- labels
272 , Set.member n toKeep
275 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
277 edges = [ Edge { edge_source = cs (show s)
278 , edge_hidden = Nothing
279 , edge_target = cs (show t)
280 , edge_weight = weight
281 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
282 , edge_id = cs (show i)
284 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
289 community_id_by_node_id = Map.fromList
290 $ map nodeId2comId partitions
292 labels = Map.toList labels'
295 ------------------------------------------------------------------------
297 data Layout = KamadaKawai | ACP | ForceAtlas
300 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
301 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
307 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
308 setCoord l labels m (n,node) = node { node_x_coord = x
312 (x,y) = getCoord l labels m n
318 -> Map (Int, Int) Double
321 getCoord KamadaKawai _ _m _n = undefined -- layout m n
323 getCoord ForceAtlas _ _ n = (sin d, cos d)
327 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
329 $ pcaReduceTo (Dimension 2)
332 to2d :: Vec.Vector Double -> (Double, Double)
335 ds = take 2 $ Vec.toList v
339 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
340 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
344 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
345 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
346 ------------------------------------------------------------------------
348 -- | KamadaKawai Layout
349 -- TODO TEST: check labels, nodeId and coordinates
350 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
351 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
353 coord :: (Map Int (Double,Double))
354 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
355 --p = Layout.defaultLGL
356 p = Layout.kamadaKawai
357 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
359 -----------------------------------------------------------------------------
361 cooc2graph'' :: Ord t => Similarity
364 -> Map (Index, Index) Double
365 cooc2graph'' distance threshold myCooc = neighbourMap
367 (ti, _) = createIndices myCooc
368 myCooc' = toIndex ti myCooc
369 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
370 distanceMat = measure distance matCooc
371 neighbourMap = filterByNeighbours threshold
372 $ mat2map distanceMat
375 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
376 filterByNeighbours threshold distanceMap = filteredMap
379 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
380 filteredMap :: Map (Index, Index) Double
381 filteredMap = Map.fromList
384 let selected = List.reverse
388 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
389 in List.take (round threshold) selected