2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
28 import Gargantext.Core.Statistics
29 import Gargantext.Core.Viz.Graph
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
34 import Gargantext.Prelude
35 import Graph.Types (ClusterNode)
36 import IGraph.Random -- (Gen(..))
37 import Test.QuickCheck (elements)
38 import Test.QuickCheck.Arbitrary
39 import qualified Data.HashMap.Strict as HashMap
40 import qualified Data.List as List
41 import qualified Data.Map as Map
42 import qualified Data.Set as Set
43 import qualified Data.Text as Text
44 import qualified Data.Vector.Storable as Vec
45 import qualified Graph.BAC.ProxemyOptim as BAC
46 import qualified IGraph as Igraph
47 import qualified IGraph.Algorithms.Layout as Layout
50 data PartitionMethod = Spinglass | Confluence
51 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
52 instance FromJSON PartitionMethod
53 instance ToJSON PartitionMethod
54 instance ToSchema PartitionMethod
55 instance Arbitrary PartitionMethod where
56 arbitrary = elements [ minBound .. maxBound ]
59 -------------------------------------------------------------
60 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
61 -- defaultClustering x = pure $ BAC.defaultClustering x
62 defaultClustering x = spinglass 1 x
64 -------------------------------------------------------------
65 type Threshold = Double
68 cooc2graph' :: Ord t => Distance
71 -> Map (Index, Index) Double
72 cooc2graph' distance threshold myCooc
73 = Map.filter (> threshold)
77 Conditional -> map2mat Triangle 0 tiSize
78 Distributional -> map2mat Square 0 tiSize
79 $ Map.filter (> 1) myCooc'
82 (ti, _) = createIndices myCooc
84 myCooc' = toIndex ti myCooc
88 -- coocurrences graph computation
89 cooc2graphWith :: PartitionMethod
92 -> HashMap (NgramsTerm, NgramsTerm) Int
94 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
95 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
98 cooc2graphWith' :: ToComId a
102 -> HashMap (NgramsTerm, NgramsTerm) Int
104 cooc2graphWith' doPartitions distance threshold myCooc = do
106 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
109 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
110 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
111 -- printDebug "similarities" similarities
114 partitions <- if (Map.size distanceMap > 0)
115 then doPartitions distanceMap
116 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
117 , "Maybe you should add more Map Terms in your list"
118 , "Tutorial: link todo"
125 (as, bs) = List.unzip $ Map.keys distanceMap
126 n' = Set.size $ Set.fromList $ as <> bs
127 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
128 confluence' = confluence (Map.keys bridgeness') 3 True False
130 pure $ data2graph ti diag bridgeness' confluence' partitions
133 doDistanceMap :: Distance
135 -> HashMap (NgramsTerm, NgramsTerm) Int
136 -> ( Map (Int,Int) Double
137 , Map (Index, Index) Int
138 , Map NgramsTerm Index
140 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
143 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
145 $ HashMap.toList myCooc
147 (ti, _it) = createIndices theMatrix
150 similarities = measure Distributional
151 $ map2mat Square 0 tiSize
152 $ toIndex ti theMatrix
154 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
156 distanceMap = Map.fromList
162 $ Map.filter (> threshold)
163 $ mat2map similarities
165 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
167 myCooc' = Map.fromList $ HashMap.toList myCooc
168 (ti, _it) = createIndices myCooc'
170 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
172 distanceMap = toIndex ti
177 $ HashMap.filter (> threshold)
180 ----------------------------------------------------------
181 -- | From data to Graph
183 type Occurrences = Int
185 data2graph :: ToComId a
186 => Map NgramsTerm Int
187 -> Map (Int, Int) Occurrences
188 -> Map (Int, Int) Double
189 -> Map (Int, Int) Double
192 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
193 , _graph_edges = edges
194 , _graph_metadata = Nothing
198 nodes = map (setCoord ForceAtlas labels bridge)
199 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
200 , node_type = Terms -- or Unknown
201 , node_id = cs (show n)
202 , node_label = unNgramsTerm l
205 , node_attributes = Attributes { clust_default = fromMaybe 0
206 (Map.lookup n community_id_by_node_id)
212 , Set.member n nodesWithScores
215 edges = [ Edge { edge_source = cs (show s)
216 , edge_target = cs (show t)
217 , edge_weight = weight
218 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
219 , edge_id = cs (show i)
221 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
226 community_id_by_node_id = Map.fromList
227 $ map nodeId2comId partitions
229 labels = Map.toList labels'
231 nodesWithScores = Set.fromList
233 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
237 ------------------------------------------------------------------------
239 data Layout = KamadaKawai | ACP | ForceAtlas
242 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
243 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
249 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
250 setCoord l labels m (n,node) = node { node_x_coord = x
254 (x,y) = getCoord l labels m n
260 -> Map (Int, Int) Double
263 getCoord KamadaKawai _ _m _n = undefined -- layout m n
265 getCoord ForceAtlas _ _ n = (sin d, cos d)
269 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
271 $ pcaReduceTo (Dimension 2)
274 to2d :: Vec.Vector Double -> (Double, Double)
277 ds = take 2 $ Vec.toList v
281 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
282 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
286 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
287 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
288 ------------------------------------------------------------------------
290 -- | KamadaKawai Layout
291 -- TODO TEST: check labels, nodeId and coordinates
292 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
293 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
295 coord :: (Map Int (Double,Double))
296 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
297 --p = Layout.defaultLGL
298 p = Layout.kamadaKawai
299 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
301 -----------------------------------------------------------------------------
303 cooc2graph'' :: Ord t => Distance
306 -> Map (Index, Index) Double
307 cooc2graph'' distance threshold myCooc = neighbourMap
309 (ti, _) = createIndices myCooc
310 myCooc' = toIndex ti myCooc
311 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
312 distanceMat = measure distance matCooc
313 neighbourMap = filterByNeighbours threshold
314 $ mat2map distanceMat
317 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
318 filterByNeighbours threshold distanceMap = filteredMap
321 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
322 filteredMap :: Map (Index, Index) Double
323 filteredMap = Map.fromList
326 let selected = List.reverse
330 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
331 in List.take (round threshold) selected