2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
20 import Data.HashMap.Strict (HashMap)
22 import Data.Maybe (fromMaybe)
23 import Data.Swagger hiding (items)
24 import GHC.Float (sin, cos)
25 import GHC.Generics (Generic)
26 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
27 import Gargantext.Core.Methods.Distances (Distance(..), measure)
28 import Gargantext.Core.Methods.Distances.Conditional (conditional)
29 -- import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
30 import Gargantext.Core.Statistics
31 import Gargantext.Core.Viz.Graph
32 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
33 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
34 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
35 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
36 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
37 import Gargantext.Prelude
38 import Graph.Types (ClusterNode)
39 import IGraph.Random -- (Gen(..))
40 import Test.QuickCheck (elements)
41 import Test.QuickCheck.Arbitrary
42 import qualified Data.HashMap.Strict as HashMap
43 import qualified Data.List as List
44 import qualified Data.Map as Map
45 import qualified Data.Set as Set
46 import qualified Data.Text as Text
47 import qualified Data.Vector.Storable as Vec
48 import qualified Graph.BAC.ProxemyOptim as BAC
49 import qualified IGraph as Igraph
50 import qualified IGraph.Algorithms.Layout as Layout
53 data PartitionMethod = Spinglass | Confluence | Infomap
54 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
55 instance FromJSON PartitionMethod
56 instance ToJSON PartitionMethod
57 instance ToSchema PartitionMethod
58 instance Arbitrary PartitionMethod where
59 arbitrary = elements [ minBound .. maxBound ]
62 -------------------------------------------------------------
63 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
64 -- defaultClustering x = pure $ BAC.defaultClustering x
65 defaultClustering x = spinglass 1 x
67 -------------------------------------------------------------
68 type Threshold = Double
71 cooc2graph' :: Ord t => Distance
74 -> Map (Index, Index) Double
75 cooc2graph' distance threshold myCooc
76 = Map.filter (> threshold)
80 Conditional -> map2mat Triangle 0 tiSize
81 Distributional -> map2mat Square 0 tiSize
82 $ Map.filter (> 1) myCooc'
85 (ti, _) = createIndices myCooc
87 myCooc' = toIndex ti myCooc
91 -- coocurrences graph computation
92 cooc2graphWith :: PartitionMethod
96 -> HashMap (NgramsTerm, NgramsTerm) Int
98 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
99 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
100 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
101 -- TODO: change these options, or make them configurable in UI?
104 cooc2graphWith' :: ToComId a
109 -> HashMap (NgramsTerm, NgramsTerm) Int
111 cooc2graphWith' doPartitions distance threshold strength myCooc = do
112 let (distanceMap, diag, ti) = doDistanceMap distance threshold strength myCooc
113 distanceMap `seq` trace "distanceMap OK" diag `seq` trace "diag OK" ti `seq` printDebug "ti done" ()
116 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
117 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
118 -- printDebug "similarities" similarities
121 partitions <- if (Map.size distanceMap > 0)
122 then doPartitions distanceMap
123 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
124 , "Maybe you should add more Map Terms in your list"
125 , "Tutorial: link todo"
127 partitions `seq` printDebug "partitions done" ()
132 (as, bs) = List.unzip $ Map.keys distanceMap
133 n' = Set.size $ Set.fromList $ as <> bs
134 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
135 confluence' = Map.empty -- BAC.computeConfluences 3 (Map.keys bridgeness') True
136 -- confluence (Map.keys bridgeness') 3 True False
137 seq bridgeness' $ printDebug "bridgeness OK" ()
138 seq confluence' $ printDebug "confluence OK" ()
139 pure $ data2graph ti diag bridgeness' confluence' partitions
143 doDistanceMap :: Distance
146 -> HashMap (NgramsTerm, NgramsTerm) Int
147 -> ( Map (Int,Int) Double
148 , Map (Index, Index) Int
149 , Map NgramsTerm Index
151 doDistanceMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
154 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
156 $ HashMap.toList myCooc
158 (ti, _it) = createIndices theMatrix
161 similarities = (\m -> m `seq` trace "measure done" m)
162 $ (\m -> m `seq` trace "map2mat done" (measure Distributional m))
163 $ (\m -> m `seq` trace "toIndex done" (map2mat Square 0 tiSize m))
164 $ theMatrix `seq` trace "theMatrix done" (toIndex ti theMatrix)
166 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
168 distanceMap = Map.fromList . trace "fromList" identity
170 $ (if strength == Weak then List.reverse else identity)
174 $ (\m -> m `seq` trace "map2map done" (Map.filter (> threshold) m))
175 $ similarities `seq` mat2map (trace "similarities done" similarities)
177 doDistanceMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
179 myCooc' = Map.fromList $ HashMap.toList myCooc
180 (ti, _it) = createIndices myCooc'
182 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
184 distanceMap = toIndex ti
187 $ (if strength == Weak then List.reverse else identity)
190 $ HashMap.filter (> threshold)
193 ----------------------------------------------------------
194 -- | From data to Graph
196 type Occurrences = Int
198 data2graph :: ToComId a
199 => Map NgramsTerm Int
200 -> Map (Int, Int) Occurrences
201 -> Map (Int, Int) Double
202 -> Map (Int, Int) Double
205 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
206 , _graph_edges = edges
207 , _graph_metadata = Nothing
211 nodes = map (setCoord ForceAtlas labels bridge)
212 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
213 , node_type = Terms -- or Unknown
214 , node_id = cs (show n)
215 , node_label = unNgramsTerm l
218 , node_attributes = Attributes { clust_default = fromMaybe 0
219 (Map.lookup n community_id_by_node_id)
225 , Set.member n nodesWithScores
228 edges = [ Edge { edge_source = cs (show s)
229 , edge_target = cs (show t)
230 , edge_weight = weight
231 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
232 , edge_id = cs (show i)
234 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
239 community_id_by_node_id = Map.fromList
240 $ map nodeId2comId partitions
242 labels = Map.toList labels'
244 nodesWithScores = Set.fromList
246 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
250 ------------------------------------------------------------------------
252 data Layout = KamadaKawai | ACP | ForceAtlas
255 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
256 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
262 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
263 setCoord l labels m (n,node) = node { node_x_coord = x
267 (x,y) = getCoord l labels m n
273 -> Map (Int, Int) Double
276 getCoord KamadaKawai _ _m _n = undefined -- layout m n
278 getCoord ForceAtlas _ _ n = (sin d, cos d)
282 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
284 $ pcaReduceTo (Dimension 2)
287 to2d :: Vec.Vector Double -> (Double, Double)
290 ds = take 2 $ Vec.toList v
294 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
295 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
299 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
300 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
301 ------------------------------------------------------------------------
303 -- | KamadaKawai Layout
304 -- TODO TEST: check labels, nodeId and coordinates
305 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
306 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
308 coord :: (Map Int (Double,Double))
309 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
310 --p = Layout.defaultLGL
311 p = Layout.kamadaKawai
312 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
314 -----------------------------------------------------------------------------
316 cooc2graph'' :: Ord t => Distance
319 -> Map (Index, Index) Double
320 cooc2graph'' distance threshold myCooc = neighbourMap
322 (ti, _) = createIndices myCooc
323 myCooc' = toIndex ti myCooc
324 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
325 distanceMat = measure distance matCooc
326 neighbourMap = filterByNeighbours threshold
327 $ mat2map distanceMat
330 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
331 filterByNeighbours threshold distanceMap = filteredMap
334 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
335 filteredMap :: Map (Index, Index) Double
336 filteredMap = Map.fromList
339 let selected = List.reverse
343 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
344 in List.take (round threshold) selected