2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
20 import Data.HashMap.Strict (HashMap)
22 import Data.Maybe (fromMaybe)
23 import Data.Swagger hiding (items)
24 import GHC.Float (sin, cos)
25 import GHC.Generics (Generic)
26 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
27 import Gargantext.Core.Methods.Distances (Distance(..), measure)
28 import Gargantext.Core.Methods.Distances.Conditional (conditional)
29 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
30 import Gargantext.Core.Statistics
31 import Gargantext.Core.Viz.Graph
32 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
33 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
34 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
35 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
36 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
37 import Gargantext.Prelude
38 import Graph.Types (ClusterNode)
39 import IGraph.Random -- (Gen(..))
40 import Test.QuickCheck (elements)
41 import Test.QuickCheck.Arbitrary
42 import qualified Data.HashMap.Strict as HashMap
43 import qualified Data.List as List
44 import qualified Data.Map as Map
45 import qualified Data.Set as Set
46 import qualified Data.Text as Text
47 import qualified Data.Vector.Storable as Vec
48 import qualified Graph.BAC.ProxemyOptim as BAC
49 import qualified IGraph as Igraph
50 import qualified IGraph.Algorithms.Layout as Layout
53 data PartitionMethod = Spinglass | Confluence | Infomap
54 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
55 instance FromJSON PartitionMethod
56 instance ToJSON PartitionMethod
57 instance ToSchema PartitionMethod
58 instance Arbitrary PartitionMethod where
59 arbitrary = elements [ minBound .. maxBound ]
62 -------------------------------------------------------------
63 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
64 -- defaultClustering x = pure $ BAC.defaultClustering x
65 defaultClustering x = spinglass 1 x
67 -------------------------------------------------------------
68 type Threshold = Double
71 cooc2graph' :: Ord t => Distance
74 -> Map (Index, Index) Double
75 cooc2graph' distance threshold myCooc
76 = Map.filter (> threshold)
80 Conditional -> map2mat Triangle 0 tiSize
81 Distributional -> map2mat Square 0 tiSize
82 $ Map.filter (> 1) myCooc'
85 (ti, _) = createIndices myCooc
87 myCooc' = toIndex ti myCooc
91 -- coocurrences graph computation
92 cooc2graphWith :: PartitionMethod
95 -> HashMap (NgramsTerm, NgramsTerm) Int
97 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
98 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
99 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
100 -- TODO: change these options, or make them configurable in UI?
103 cooc2graphWith' :: ToComId a
107 -> HashMap (NgramsTerm, NgramsTerm) Int
109 cooc2graphWith' doPartitions distance threshold myCooc = do
110 let (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
111 distanceMap `seq` trace "distanceMap OK" diag `seq` trace "diag OK" ti `seq` printDebug "ti done" ()
114 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
115 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
116 -- printDebug "similarities" similarities
119 partitions <- if (Map.size distanceMap > 0)
120 then doPartitions distanceMap
121 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
122 , "Maybe you should add more Map Terms in your list"
123 , "Tutorial: link todo"
125 partitions `seq` printDebug "partitions done" ()
130 (as, bs) = List.unzip $ Map.keys distanceMap
131 n' = Set.size $ Set.fromList $ as <> bs
132 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
133 confluence' = confluence (Map.keys bridgeness') 3 True False
134 seq bridgeness' $ printDebug "bridgeness OK" ()
135 seq confluence' $ printDebug "confluence OK" ()
136 pure $ data2graph ti diag bridgeness' confluence' partitions
139 doDistanceMap :: Distance
141 -> HashMap (NgramsTerm, NgramsTerm) Int
142 -> ( Map (Int,Int) Double
143 , Map (Index, Index) Int
144 , Map NgramsTerm Index
146 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
149 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
151 $ HashMap.toList myCooc
153 (ti, _it) = createIndices theMatrix
156 similarities = (\m -> m `seq` trace "measure done" m)
157 $ (\m -> m `seq` trace "map2mat done" (measure Distributional m))
158 $ (\m -> m `seq` trace "toIndex done" (map2mat Square 0 tiSize m))
159 $ theMatrix `seq` trace "theMatrix done" (toIndex ti theMatrix)
161 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
163 distanceMap = Map.fromList . trace "fromList" identity
169 $ (\m -> m `seq` trace "map2map done" (Map.filter (> threshold) m))
170 $ similarities `seq` mat2map (trace "similarities done" similarities)
172 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
174 myCooc' = Map.fromList $ HashMap.toList myCooc
175 (ti, _it) = createIndices myCooc'
177 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
179 distanceMap = toIndex ti
185 $ HashMap.filter (> threshold)
188 ----------------------------------------------------------
189 -- | From data to Graph
191 type Occurrences = Int
193 data2graph :: ToComId a
194 => Map NgramsTerm Int
195 -> Map (Int, Int) Occurrences
196 -> Map (Int, Int) Double
197 -> Map (Int, Int) Double
200 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
201 , _graph_edges = edges
202 , _graph_metadata = Nothing
206 nodes = map (setCoord ForceAtlas labels bridge)
207 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
208 , node_type = Terms -- or Unknown
209 , node_id = cs (show n)
210 , node_label = unNgramsTerm l
213 , node_attributes = Attributes { clust_default = fromMaybe 0
214 (Map.lookup n community_id_by_node_id)
220 , Set.member n nodesWithScores
223 edges = [ Edge { edge_source = cs (show s)
224 , edge_target = cs (show t)
225 , edge_weight = weight
226 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
227 , edge_id = cs (show i)
229 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
234 community_id_by_node_id = Map.fromList
235 $ map nodeId2comId partitions
237 labels = Map.toList labels'
239 nodesWithScores = Set.fromList
241 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
245 ------------------------------------------------------------------------
247 data Layout = KamadaKawai | ACP | ForceAtlas
250 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
251 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
257 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
258 setCoord l labels m (n,node) = node { node_x_coord = x
262 (x,y) = getCoord l labels m n
268 -> Map (Int, Int) Double
271 getCoord KamadaKawai _ _m _n = undefined -- layout m n
273 getCoord ForceAtlas _ _ n = (sin d, cos d)
277 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
279 $ pcaReduceTo (Dimension 2)
282 to2d :: Vec.Vector Double -> (Double, Double)
285 ds = take 2 $ Vec.toList v
289 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
290 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
294 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
295 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
296 ------------------------------------------------------------------------
298 -- | KamadaKawai Layout
299 -- TODO TEST: check labels, nodeId and coordinates
300 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
301 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
303 coord :: (Map Int (Double,Double))
304 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
305 --p = Layout.defaultLGL
306 p = Layout.kamadaKawai
307 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
309 -----------------------------------------------------------------------------
311 cooc2graph'' :: Ord t => Distance
314 -> Map (Index, Index) Double
315 cooc2graph'' distance threshold myCooc = neighbourMap
317 (ti, _) = createIndices myCooc
318 myCooc' = toIndex ti myCooc
319 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
320 distanceMat = measure distance matCooc
321 neighbourMap = filterByNeighbours threshold
322 $ mat2map distanceMat
325 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
326 filterByNeighbours threshold distanceMap = filteredMap
329 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
330 filteredMap :: Map (Index, Index) Double
331 filteredMap = Map.fromList
334 let selected = List.reverse
338 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
339 in List.take (round threshold) selected