2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
28 import Gargantext.Core.Statistics
29 import Gargantext.Core.Viz.Graph
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.Text as Text
45 import qualified Data.Vector.Storable as Vec
46 import qualified Graph.BAC.ProxemyOptim as BAC
47 import qualified IGraph as Igraph
48 import qualified IGraph.Algorithms.Layout as Layout
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
60 -------------------------------------------------------------
61 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
62 -- defaultClustering x = pure $ BAC.defaultClustering x
63 defaultClustering x = spinglass 1 x
65 -------------------------------------------------------------
66 type Threshold = Double
69 cooc2graph' :: Ord t => Distance
72 -> Map (Index, Index) Double
73 cooc2graph' distance threshold myCooc
74 = Map.filter (> threshold)
78 Conditional -> map2mat Triangle 0 tiSize
79 Distributional -> map2mat Square 0 tiSize
80 $ Map.filter (> 1) myCooc'
83 (ti, _) = createIndices myCooc
85 myCooc' = toIndex ti myCooc
89 -- coocurrences graph computation
90 cooc2graphWith :: PartitionMethod
93 -> HashMap (NgramsTerm, NgramsTerm) Int
95 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
96 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
97 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
98 -- TODO: change these options, or make them configurable in UI?
101 cooc2graphWith' :: ToComId a
105 -> HashMap (NgramsTerm, NgramsTerm) Int
107 cooc2graphWith' doPartitions distance threshold myCooc = do
109 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
112 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
113 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
114 -- printDebug "similarities" similarities
117 partitions <- if (Map.size distanceMap > 0)
118 then doPartitions distanceMap
119 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
120 , "Maybe you should add more Map Terms in your list"
121 , "Tutorial: link todo"
128 (as, bs) = List.unzip $ Map.keys distanceMap
129 n' = Set.size $ Set.fromList $ as <> bs
130 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
131 confluence' = confluence (Map.keys bridgeness') 3 True False
133 pure $ data2graph ti diag bridgeness' confluence' partitions
136 doDistanceMap :: Distance
138 -> HashMap (NgramsTerm, NgramsTerm) Int
139 -> ( Map (Int,Int) Double
140 , Map (Index, Index) Int
141 , Map NgramsTerm Index
143 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
146 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
148 $ HashMap.toList myCooc
150 (ti, _it) = createIndices theMatrix
153 similarities = measure Distributional
154 $ map2mat Square 0 tiSize
155 $ toIndex ti theMatrix
157 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
159 distanceMap = Map.fromList
165 $ Map.filter (> threshold)
166 $ mat2map similarities
168 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
170 myCooc' = Map.fromList $ HashMap.toList myCooc
171 (ti, _it) = createIndices myCooc'
173 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
175 distanceMap = toIndex ti
180 $ HashMap.filter (> threshold)
183 ----------------------------------------------------------
184 -- | From data to Graph
186 type Occurrences = Int
188 data2graph :: ToComId a
189 => Map NgramsTerm Int
190 -> Map (Int, Int) Occurrences
191 -> Map (Int, Int) Double
192 -> Map (Int, Int) Double
195 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
196 , _graph_edges = edges
197 , _graph_metadata = Nothing
201 nodes = map (setCoord ForceAtlas labels bridge)
202 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
203 , node_type = Terms -- or Unknown
204 , node_id = cs (show n)
205 , node_label = unNgramsTerm l
208 , node_attributes = Attributes { clust_default = fromMaybe 0
209 (Map.lookup n community_id_by_node_id)
215 , Set.member n nodesWithScores
218 edges = [ Edge { edge_source = cs (show s)
219 , edge_target = cs (show t)
220 , edge_weight = weight
221 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
222 , edge_id = cs (show i)
224 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
229 community_id_by_node_id = Map.fromList
230 $ map nodeId2comId partitions
232 labels = Map.toList labels'
234 nodesWithScores = Set.fromList
236 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
240 ------------------------------------------------------------------------
242 data Layout = KamadaKawai | ACP | ForceAtlas
245 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
246 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
252 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
253 setCoord l labels m (n,node) = node { node_x_coord = x
257 (x,y) = getCoord l labels m n
263 -> Map (Int, Int) Double
266 getCoord KamadaKawai _ _m _n = undefined -- layout m n
268 getCoord ForceAtlas _ _ n = (sin d, cos d)
272 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
274 $ pcaReduceTo (Dimension 2)
277 to2d :: Vec.Vector Double -> (Double, Double)
280 ds = take 2 $ Vec.toList v
284 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
285 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
289 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
290 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
291 ------------------------------------------------------------------------
293 -- | KamadaKawai Layout
294 -- TODO TEST: check labels, nodeId and coordinates
295 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
296 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
298 coord :: (Map Int (Double,Double))
299 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
300 --p = Layout.defaultLGL
301 p = Layout.kamadaKawai
302 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
304 -----------------------------------------------------------------------------
306 cooc2graph'' :: Ord t => Distance
309 -> Map (Index, Index) Double
310 cooc2graph'' distance threshold myCooc = neighbourMap
312 (ti, _) = createIndices myCooc
313 myCooc' = toIndex ti myCooc
314 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
315 distanceMat = measure distance matCooc
316 neighbourMap = filterByNeighbours threshold
317 $ mat2map distanceMat
320 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
321 filterByNeighbours threshold distanceMap = filteredMap
324 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
325 filteredMap :: Map (Index, Index) Double
326 filteredMap = Map.fromList
329 let selected = List.reverse
333 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
334 in List.take (round threshold) selected