2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
34 import Gargantext.Prelude
35 import Graph.Types (ClusterNode)
36 import IGraph.Random -- (Gen(..))
37 import Test.QuickCheck (elements)
38 import Test.QuickCheck.Arbitrary
39 import qualified Data.HashMap.Strict as HashMap
40 import qualified Data.List as List
41 import qualified Data.Map as Map
42 import qualified Data.Set as Set
43 import qualified Data.Text as Text
44 import qualified Data.Vector.Storable as Vec
45 import qualified Graph.BAC.ProxemyOptim as BAC
46 import qualified IGraph as Igraph
47 import qualified IGraph.Algorithms.Layout as Layout
50 data PartitionMethod = Spinglass | Confluence | Infomap
51 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
52 instance FromJSON PartitionMethod
53 instance ToJSON PartitionMethod
54 instance ToSchema PartitionMethod
55 instance Arbitrary PartitionMethod where
56 arbitrary = elements [ minBound .. maxBound ]
59 -------------------------------------------------------------
60 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
61 -- defaultClustering x = pure $ BAC.defaultClustering x
62 defaultClustering x = spinglass 1 x
64 -------------------------------------------------------------
65 type Threshold = Double
68 cooc2graph' :: Ord t => Distance
71 -> Map (Index, Index) Double
72 cooc2graph' distance threshold myCooc
73 = Map.filter (> threshold)
77 Conditional -> map2mat Triangle 0 tiSize
78 Distributional -> map2mat Square 0 tiSize
79 $ Map.filter (> 1) myCooc'
82 (ti, _) = createIndices myCooc
84 myCooc' = toIndex ti myCooc
88 -- coocurrences graph computation
89 cooc2graphWith :: PartitionMethod
93 -> HashMap (NgramsTerm, NgramsTerm) Int
95 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
96 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
97 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
98 -- TODO: change these options, or make them configurable in UI?
101 cooc2graphWith' :: ToComId a
106 -> HashMap (NgramsTerm, NgramsTerm) Int
108 cooc2graphWith' doPartitions distance threshold strength myCooc = do
109 let (distanceMap, diag, ti) = doDistanceMap distance threshold strength myCooc
110 distanceMap `seq` diag `seq` ti `seq` return ()
113 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
114 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
115 -- printDebug "similarities" similarities
118 partitions <- if (Map.size distanceMap > 0)
119 then doPartitions distanceMap
120 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
121 , "Maybe you should add more Map Terms in your list"
122 , "Tutorial: link todo"
124 length partitions `seq` return ()
129 (as, bs) = List.unzip $ Map.keys distanceMap
130 n' = Set.size $ Set.fromList $ as <> bs
131 !bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
132 !confluence' = Map.empty -- BAC.computeConfluences 3 (Map.keys bridgeness') True
133 pure $ data2graph ti diag bridgeness' confluence' partitions
137 doDistanceMap :: Distance
140 -> HashMap (NgramsTerm, NgramsTerm) Int
141 -> ( Map (Int,Int) Double
142 , Map (Index, Index) Int
143 , Map NgramsTerm Index
145 doDistanceMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
148 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
150 $ HashMap.toList myCooc
152 (ti, _it) = createIndices theMatrix
155 similarities = (\m -> m `seq` m)
156 $ (\m -> m `seq` measure Distributional m)
157 $ (\m -> m `seq` map2mat Square 0 tiSize m)
158 $ theMatrix `seq` toIndex ti theMatrix
160 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
162 distanceMap = Map.fromList
164 $ (if strength == Weak then List.reverse else identity)
168 $ (\m -> m `seq` Map.filter (> threshold) m)
169 $ similarities `seq` mat2map similarities
171 doDistanceMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
173 myCooc' = Map.fromList $ HashMap.toList myCooc
174 (ti, _it) = createIndices myCooc'
176 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
178 distanceMap = toIndex ti
181 $ (if strength == Weak then List.reverse else identity)
184 $ HashMap.filter (> threshold)
187 ----------------------------------------------------------
188 -- | From data to Graph
190 type Occurrences = Int
192 data2graph :: ToComId a
193 => Map NgramsTerm Int
194 -> Map (Int, Int) Occurrences
195 -> Map (Int, Int) Double
196 -> Map (Int, Int) Double
199 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
200 , _graph_edges = edges
201 , _graph_metadata = Nothing
205 nodes = map (setCoord ForceAtlas labels bridge)
206 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
207 , node_type = Terms -- or Unknown
208 , node_id = cs (show n)
209 , node_label = unNgramsTerm l
212 , node_attributes = Attributes { clust_default = fromMaybe 0
213 (Map.lookup n community_id_by_node_id)
219 , Set.member n nodesWithScores
222 edges = [ Edge { edge_source = cs (show s)
223 , edge_target = cs (show t)
224 , edge_weight = weight
225 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
226 , edge_id = cs (show i)
228 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
233 community_id_by_node_id = Map.fromList
234 $ map nodeId2comId partitions
236 labels = Map.toList labels'
238 nodesWithScores = Set.fromList
240 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
244 ------------------------------------------------------------------------
246 data Layout = KamadaKawai | ACP | ForceAtlas
249 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
250 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
256 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
257 setCoord l labels m (n,node) = node { node_x_coord = x
261 (x,y) = getCoord l labels m n
267 -> Map (Int, Int) Double
270 getCoord KamadaKawai _ _m _n = undefined -- layout m n
272 getCoord ForceAtlas _ _ n = (sin d, cos d)
276 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
278 $ pcaReduceTo (Dimension 2)
281 to2d :: Vec.Vector Double -> (Double, Double)
284 ds = take 2 $ Vec.toList v
288 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
289 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
293 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
294 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
295 ------------------------------------------------------------------------
297 -- | KamadaKawai Layout
298 -- TODO TEST: check labels, nodeId and coordinates
299 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
300 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
302 coord :: (Map Int (Double,Double))
303 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
304 --p = Layout.defaultLGL
305 p = Layout.kamadaKawai
306 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
308 -----------------------------------------------------------------------------
310 cooc2graph'' :: Ord t => Distance
313 -> Map (Index, Index) Double
314 cooc2graph'' distance threshold myCooc = neighbourMap
316 (ti, _) = createIndices myCooc
317 myCooc' = toIndex ti myCooc
318 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
319 distanceMat = measure distance matCooc
320 neighbourMap = filterByNeighbours threshold
321 $ mat2map distanceMat
324 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
325 filterByNeighbours threshold distanceMap = filteredMap
328 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
329 filteredMap :: Map (Index, Index) Double
330 filteredMap = Map.fromList
333 let selected = List.reverse
337 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
338 in List.take (round threshold) selected