2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
20 import Data.HashMap.Strict (HashMap)
22 import Data.Maybe (fromMaybe)
23 import Data.Swagger hiding (items)
24 import GHC.Float (sin, cos)
25 import GHC.Generics (Generic)
26 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
27 import Gargantext.Core.Methods.Distances (Distance(..), measure)
28 import Gargantext.Core.Methods.Distances.Conditional (conditional)
29 -- import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
30 import Gargantext.Core.Statistics
31 import Gargantext.Core.Viz.Graph
32 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
33 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
34 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
35 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
36 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
37 import Gargantext.Prelude
38 import Graph.Types (ClusterNode)
39 import IGraph.Random -- (Gen(..))
40 import Test.QuickCheck (elements)
41 import Test.QuickCheck.Arbitrary
42 import qualified Data.HashMap.Strict as HashMap
43 import qualified Data.List as List
44 import qualified Data.Map as Map
45 import qualified Data.Set as Set
46 import qualified Data.Text as Text
47 import qualified Data.Vector.Storable as Vec
48 import qualified Graph.BAC.ProxemyOptim as BAC
49 import qualified IGraph as Igraph
50 import qualified IGraph.Algorithms.Layout as Layout
53 data PartitionMethod = Spinglass | Confluence | Infomap
54 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
55 instance FromJSON PartitionMethod
56 instance ToJSON PartitionMethod
57 instance ToSchema PartitionMethod
58 instance Arbitrary PartitionMethod where
59 arbitrary = elements [ minBound .. maxBound ]
62 -------------------------------------------------------------
63 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
64 -- defaultClustering x = pure $ BAC.defaultClustering x
65 defaultClustering x = spinglass 1 x
67 -------------------------------------------------------------
68 type Threshold = Double
71 cooc2graph' :: Ord t => Distance
74 -> Map (Index, Index) Double
75 cooc2graph' distance threshold myCooc
76 = Map.filter (> threshold)
80 Conditional -> map2mat Triangle 0 tiSize
81 Distributional -> map2mat Square 0 tiSize
82 $ Map.filter (> 1) myCooc'
85 (ti, _) = createIndices myCooc
87 myCooc' = toIndex ti myCooc
91 -- coocurrences graph computation
92 cooc2graphWith :: PartitionMethod
95 -> HashMap (NgramsTerm, NgramsTerm) Int
97 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
98 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
99 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
100 -- TODO: change these options, or make them configurable in UI?
103 cooc2graphWith' :: ToComId a
107 -> HashMap (NgramsTerm, NgramsTerm) Int
109 cooc2graphWith' doPartitions distance threshold myCooc = do
110 let (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
111 distanceMap `seq` trace "distanceMap OK" diag `seq` trace "diag OK" ti `seq` printDebug "ti done" ()
114 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
115 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
116 -- printDebug "similarities" similarities
119 partitions <- if (Map.size distanceMap > 0)
120 then doPartitions distanceMap
121 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
122 , "Maybe you should add more Map Terms in your list"
123 , "Tutorial: link todo"
125 partitions `seq` printDebug "partitions done" ()
130 (as, bs) = List.unzip $ Map.keys distanceMap
131 n' = Set.size $ Set.fromList $ as <> bs
132 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
133 confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
134 -- confluence (Map.keys bridgeness') 3 True False
135 seq bridgeness' $ printDebug "bridgeness OK" ()
136 seq confluence' $ printDebug "confluence OK" ()
137 pure $ data2graph ti diag bridgeness' confluence' partitions
140 doDistanceMap :: Distance
142 -> HashMap (NgramsTerm, NgramsTerm) Int
143 -> ( Map (Int,Int) Double
144 , Map (Index, Index) Int
145 , Map NgramsTerm Index
147 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
150 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
152 $ HashMap.toList myCooc
154 (ti, _it) = createIndices theMatrix
157 similarities = (\m -> m `seq` trace "measure done" m)
158 $ (\m -> m `seq` trace "map2mat done" (measure Distributional m))
159 $ (\m -> m `seq` trace "toIndex done" (map2mat Square 0 tiSize m))
160 $ theMatrix `seq` trace "theMatrix done" (toIndex ti theMatrix)
162 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
164 distanceMap = Map.fromList . trace "fromList" identity
170 $ (\m -> m `seq` trace "map2map done" (Map.filter (> threshold) m))
171 $ similarities `seq` mat2map (trace "similarities done" similarities)
173 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
175 myCooc' = Map.fromList $ HashMap.toList myCooc
176 (ti, _it) = createIndices myCooc'
178 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
180 distanceMap = toIndex ti
186 $ HashMap.filter (> threshold)
189 ----------------------------------------------------------
190 -- | From data to Graph
192 type Occurrences = Int
194 data2graph :: ToComId a
195 => Map NgramsTerm Int
196 -> Map (Int, Int) Occurrences
197 -> Map (Int, Int) Double
198 -> Map (Int, Int) Double
201 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
202 , _graph_edges = edges
203 , _graph_metadata = Nothing
207 nodes = map (setCoord ForceAtlas labels bridge)
208 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
209 , node_type = Terms -- or Unknown
210 , node_id = cs (show n)
211 , node_label = unNgramsTerm l
214 , node_attributes = Attributes { clust_default = fromMaybe 0
215 (Map.lookup n community_id_by_node_id)
221 , Set.member n nodesWithScores
224 edges = [ Edge { edge_source = cs (show s)
225 , edge_target = cs (show t)
226 , edge_weight = weight
227 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
228 , edge_id = cs (show i)
230 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
235 community_id_by_node_id = Map.fromList
236 $ map nodeId2comId partitions
238 labels = Map.toList labels'
240 nodesWithScores = Set.fromList
242 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
246 ------------------------------------------------------------------------
248 data Layout = KamadaKawai | ACP | ForceAtlas
251 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
252 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
258 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
259 setCoord l labels m (n,node) = node { node_x_coord = x
263 (x,y) = getCoord l labels m n
269 -> Map (Int, Int) Double
272 getCoord KamadaKawai _ _m _n = undefined -- layout m n
274 getCoord ForceAtlas _ _ n = (sin d, cos d)
278 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
280 $ pcaReduceTo (Dimension 2)
283 to2d :: Vec.Vector Double -> (Double, Double)
286 ds = take 2 $ Vec.toList v
290 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
291 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
295 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
296 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
297 ------------------------------------------------------------------------
299 -- | KamadaKawai Layout
300 -- TODO TEST: check labels, nodeId and coordinates
301 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
302 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
304 coord :: (Map Int (Double,Double))
305 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
306 --p = Layout.defaultLGL
307 p = Layout.kamadaKawai
308 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
310 -----------------------------------------------------------------------------
312 cooc2graph'' :: Ord t => Distance
315 -> Map (Index, Index) Double
316 cooc2graph'' distance threshold myCooc = neighbourMap
318 (ti, _) = createIndices myCooc
319 myCooc' = toIndex ti myCooc
320 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
321 distanceMat = measure distance matCooc
322 neighbourMap = filterByNeighbours threshold
323 $ mat2map distanceMat
326 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
327 filterByNeighbours threshold distanceMap = filteredMap
330 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
331 filteredMap :: Map (Index, Index) Double
332 filteredMap = Map.fromList
335 let selected = List.reverse
339 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
340 in List.take (round threshold) selected