2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
28 import Gargantext.Core.Statistics
29 import Gargantext.Core.Viz.Graph
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.Text as Text
45 import qualified Data.Vector.Storable as Vec
46 import qualified Graph.BAC.ProxemyOptim as BAC
47 import qualified IGraph as Igraph
48 import qualified IGraph.Algorithms.Layout as Layout
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
60 -------------------------------------------------------------
61 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
62 -- defaultClustering x = pure $ BAC.defaultClustering x
63 defaultClustering x = spinglass 1 x
65 -------------------------------------------------------------
66 type Threshold = Double
69 cooc2graph' :: Ord t => Distance
72 -> Map (Index, Index) Double
73 cooc2graph' distance threshold myCooc
74 = Map.filter (> threshold)
78 Conditional -> map2mat Triangle 0 tiSize
79 Distributional -> map2mat Square 0 tiSize
80 $ Map.filter (> 1) myCooc'
83 (ti, _) = createIndices myCooc
85 myCooc' = toIndex ti myCooc
89 -- coocurrences graph computation
90 cooc2graphWith :: PartitionMethod
93 -> HashMap (NgramsTerm, NgramsTerm) Int
95 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
96 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
97 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
98 -- TODO: change these options, or make them configurable in UI?
101 cooc2graphWith' :: ToComId a
105 -> HashMap (NgramsTerm, NgramsTerm) Int
107 cooc2graphWith' doPartitions distance threshold myCooc = do
109 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
112 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
113 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
114 -- printDebug "similarities" similarities
117 partitions <- if (Map.size distanceMap > 0)
118 then doPartitions distanceMap
119 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
120 , "Maybe you should add more Map Terms in your list"
121 , "Tutorial: link todo"
128 (as, bs) = List.unzip $ Map.keys distanceMap
129 n' = Set.size $ Set.fromList $ as <> bs
130 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
131 confluence' = confluence (Map.keys bridgeness') 3 True False
133 pure $ data2graph ti diag bridgeness' confluence' partitions
136 doDistanceMap :: Distance
138 -> HashMap (NgramsTerm, NgramsTerm) Int
139 -> ( Map (Int,Int) Double
140 , Map (Index, Index) Int
141 , Map NgramsTerm Index
143 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
146 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
148 $ HashMap.toList myCooc
150 (ti, _it) = createIndices theMatrix
153 similarities = measure Distributional
154 $ map2mat Square 0 tiSize
155 $ toIndex ti theMatrix
157 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
159 distanceMap = Map.fromList
165 $ Map.filter (> threshold)
166 $ mat2map similarities
168 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
170 myCooc' = Map.fromList $ HashMap.toList myCooc
171 (ti, _it) = createIndices myCooc'
173 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
175 distanceMap = toIndex ti
181 $ HashMap.filter (> threshold)
184 ----------------------------------------------------------
185 -- | From data to Graph
187 type Occurrences = Int
189 data2graph :: ToComId a
190 => Map NgramsTerm Int
191 -> Map (Int, Int) Occurrences
192 -> Map (Int, Int) Double
193 -> Map (Int, Int) Double
196 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
197 , _graph_edges = edges
198 , _graph_metadata = Nothing
202 nodes = map (setCoord ForceAtlas labels bridge)
203 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
204 , node_type = Terms -- or Unknown
205 , node_id = cs (show n)
206 , node_label = unNgramsTerm l
209 , node_attributes = Attributes { clust_default = fromMaybe 0
210 (Map.lookup n community_id_by_node_id)
216 , Set.member n nodesWithScores
219 edges = [ Edge { edge_source = cs (show s)
220 , edge_target = cs (show t)
221 , edge_weight = weight
222 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
223 , edge_id = cs (show i)
225 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
230 community_id_by_node_id = Map.fromList
231 $ map nodeId2comId partitions
233 labels = Map.toList labels'
235 nodesWithScores = Set.fromList
237 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
241 ------------------------------------------------------------------------
243 data Layout = KamadaKawai | ACP | ForceAtlas
246 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
247 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
253 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
254 setCoord l labels m (n,node) = node { node_x_coord = x
258 (x,y) = getCoord l labels m n
264 -> Map (Int, Int) Double
267 getCoord KamadaKawai _ _m _n = undefined -- layout m n
269 getCoord ForceAtlas _ _ n = (sin d, cos d)
273 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
275 $ pcaReduceTo (Dimension 2)
278 to2d :: Vec.Vector Double -> (Double, Double)
281 ds = take 2 $ Vec.toList v
285 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
286 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
290 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
291 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
292 ------------------------------------------------------------------------
294 -- | KamadaKawai Layout
295 -- TODO TEST: check labels, nodeId and coordinates
296 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
297 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
299 coord :: (Map Int (Double,Double))
300 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
301 --p = Layout.defaultLGL
302 p = Layout.kamadaKawai
303 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
305 -----------------------------------------------------------------------------
307 cooc2graph'' :: Ord t => Distance
310 -> Map (Index, Index) Double
311 cooc2graph'' distance threshold myCooc = neighbourMap
313 (ti, _) = createIndices myCooc
314 myCooc' = toIndex ti myCooc
315 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
316 distanceMat = measure distance matCooc
317 neighbourMap = filterByNeighbours threshold
318 $ mat2map distanceMat
321 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
322 filterByNeighbours threshold distanceMap = filteredMap
325 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
326 filteredMap :: Map (Index, Index) Double
327 filteredMap = Map.fromList
330 let selected = List.reverse
334 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
335 in List.take (round threshold) selected