2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
34 import Gargantext.Prelude
35 import Graph.Types (ClusterNode)
36 import IGraph.Random -- (Gen(..))
37 import Test.QuickCheck (elements)
38 import Test.QuickCheck.Arbitrary
39 import qualified Data.HashMap.Strict as HashMap
40 import qualified Data.List as List
41 import qualified Data.Map as Map
42 import qualified Data.Set as Set
43 import qualified Data.Text as Text
44 import qualified Data.Vector.Storable as Vec
45 import qualified Graph.BAC.ProxemyOptim as BAC
46 import qualified IGraph as Igraph
47 import qualified IGraph.Algorithms.Layout as Layout
50 data PartitionMethod = Spinglass | Confluence | Infomap
51 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
52 instance FromJSON PartitionMethod
53 instance ToJSON PartitionMethod
54 instance ToSchema PartitionMethod
55 instance Arbitrary PartitionMethod where
56 arbitrary = elements [ minBound .. maxBound ]
59 -------------------------------------------------------------
60 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
61 -- defaultClustering x = pure $ BAC.defaultClustering x
62 defaultClustering x = spinglass 1 x
64 -------------------------------------------------------------
65 type Threshold = Double
68 cooc2graph' :: Ord t => Distance
71 -> Map (Index, Index) Double
72 cooc2graph' distance threshold myCooc
73 = Map.filter (> threshold)
77 Conditional -> map2mat Triangle 0 tiSize
78 Distributional -> map2mat Square 0 tiSize
79 $ Map.filter (> 1) myCooc'
82 (ti, _) = createIndices myCooc
84 myCooc' = toIndex ti myCooc
88 -- coocurrences graph computation
89 cooc2graphWith :: PartitionMethod
93 -> HashMap (NgramsTerm, NgramsTerm) Int
95 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
96 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
97 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
98 -- TODO: change these options, or make them configurable in UI?
101 cooc2graphWith' :: ToComId a
106 -> HashMap (NgramsTerm, NgramsTerm) Int
108 cooc2graphWith' doPartitions distance threshold strength myCooc = do
109 let (distanceMap, diag, ti) = doDistanceMap distance threshold strength myCooc
110 distanceMap `seq` diag `seq` ti `seq` return ()
113 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
114 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
115 -- printDebug "similarities" similarities
118 partitions <- if (Map.size distanceMap > 0)
119 then doPartitions distanceMap
120 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
121 , "Maybe you should add more Map Terms in your list"
122 , "Tutorial: link todo"
124 length partitions `seq` return ()
129 (as, bs) = List.unzip $ Map.keys distanceMap
130 n' = Set.size $ Set.fromList $ as <> bs
131 !bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
132 !confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
133 pure $ data2graph ti diag bridgeness' confluence' partitions
137 doDistanceMap :: Distance
140 -> HashMap (NgramsTerm, NgramsTerm) Int
141 -> ( Map (Int,Int) Double
142 , Map (Index, Index) Int
143 , Map NgramsTerm Index
145 doDistanceMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
148 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
150 $ HashMap.toList myCooc
152 (ti, _it) = createIndices theMatrix
155 similarities = (\m -> m `seq` m)
156 $ (\m -> m `seq` measure Distributional m)
157 $ (\m -> m `seq` map2mat Square 0 tiSize m)
158 $ theMatrix `seq` toIndex ti theMatrix
160 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
162 distanceMap = Map.fromList
164 $ (if strength == Weak then List.reverse else identity)
168 $ (\m -> m `seq` Map.filter (> threshold) m)
169 $ similarities `seq` mat2map similarities
171 doDistanceMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
173 myCooc' = Map.fromList $ HashMap.toList myCooc
174 (ti, _it) = createIndices myCooc'
175 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
176 distanceMap = toIndex ti
179 $ (if strength == Weak then List.reverse else identity)
182 $ HashMap.filter (> threshold)
185 ----------------------------------------------------------
186 -- | From data to Graph
188 type Occurrences = Int
190 data2graph :: ToComId a
191 => Map NgramsTerm Int
192 -> Map (Int, Int) Occurrences
193 -> Map (Int, Int) Double
194 -> Map (Int, Int) Double
197 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
198 , _graph_edges = edges
199 , _graph_metadata = Nothing
203 nodes = map (setCoord ForceAtlas labels bridge)
204 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
205 , node_type = Terms -- or Unknown
206 , node_id = cs (show n)
207 , node_label = unNgramsTerm l
210 , node_attributes = Attributes { clust_default = fromMaybe 0
211 (Map.lookup n community_id_by_node_id)
217 , Set.member n toKeep
220 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
222 edges = [ Edge { edge_source = cs (show s)
223 , edge_target = cs (show t)
224 , edge_weight = weight
225 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
226 , edge_id = cs (show i)
228 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
233 community_id_by_node_id = Map.fromList
234 $ map nodeId2comId partitions
236 labels = Map.toList labels'
239 ------------------------------------------------------------------------
241 data Layout = KamadaKawai | ACP | ForceAtlas
244 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
245 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
251 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
252 setCoord l labels m (n,node) = node { node_x_coord = x
256 (x,y) = getCoord l labels m n
262 -> Map (Int, Int) Double
265 getCoord KamadaKawai _ _m _n = undefined -- layout m n
267 getCoord ForceAtlas _ _ n = (sin d, cos d)
271 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
273 $ pcaReduceTo (Dimension 2)
276 to2d :: Vec.Vector Double -> (Double, Double)
279 ds = take 2 $ Vec.toList v
283 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
284 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
288 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
289 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
290 ------------------------------------------------------------------------
292 -- | KamadaKawai Layout
293 -- TODO TEST: check labels, nodeId and coordinates
294 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
295 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
297 coord :: (Map Int (Double,Double))
298 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
299 --p = Layout.defaultLGL
300 p = Layout.kamadaKawai
301 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
303 -----------------------------------------------------------------------------
305 cooc2graph'' :: Ord t => Distance
308 -> Map (Index, Index) Double
309 cooc2graph'' distance threshold myCooc = neighbourMap
311 (ti, _) = createIndices myCooc
312 myCooc' = toIndex ti myCooc
313 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
314 distanceMat = measure distance matCooc
315 neighbourMap = filterByNeighbours threshold
316 $ mat2map distanceMat
319 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
320 filterByNeighbours threshold distanceMap = filteredMap
323 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
324 filteredMap :: Map (Index, Index) Double
325 filteredMap = Map.fromList
328 let selected = List.reverse
332 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
333 in List.take (round threshold) selected