2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
28 import Gargantext.Core.Statistics
29 import Gargantext.Core.Viz.Graph
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
34 import Gargantext.Prelude
35 import Graph.Types (ClusterNode)
36 import IGraph.Random -- (Gen(..))
37 import Test.QuickCheck (elements)
38 import Test.QuickCheck.Arbitrary
39 import qualified Data.HashMap.Strict as HashMap
40 import qualified Data.List as List
41 import qualified Data.Map as Map
42 import qualified Data.Set as Set
43 import qualified Data.Vector.Storable as Vec
44 import qualified Graph.BAC.ProxemyOptim as BAC
45 import qualified IGraph as Igraph
46 import qualified IGraph.Algorithms.Layout as Layout
49 data PartitionMethod = Spinglass | Confluence
50 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
51 instance FromJSON PartitionMethod
52 instance ToJSON PartitionMethod
53 instance ToSchema PartitionMethod
54 instance Arbitrary PartitionMethod where
55 arbitrary = elements [ minBound .. maxBound ]
58 -------------------------------------------------------------
59 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
60 -- defaultClustering x = pure $ BAC.defaultClustering x
61 defaultClustering x = spinglass 1 x
63 -------------------------------------------------------------
64 type Threshold = Double
67 cooc2graph' :: Ord t => Distance
70 -> Map (Index, Index) Double
71 cooc2graph' distance threshold myCooc
72 = Map.filter (> threshold)
76 Conditional -> map2mat Triangle 0 tiSize
77 Distributional -> map2mat Square 0 tiSize
78 $ Map.filter (> 1) myCooc'
81 (ti, _) = createIndices myCooc
83 myCooc' = toIndex ti myCooc
87 -- coocurrences graph computation
88 cooc2graphWith :: PartitionMethod
91 -> HashMap (NgramsTerm, NgramsTerm) Int
93 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
94 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
97 cooc2graphWith' :: ToComId a
101 -> HashMap (NgramsTerm, NgramsTerm) Int
103 cooc2graphWith' doPartitions distance threshold myCooc = do
105 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
108 saveAsFileDebug "/tmp/distanceMap" distanceMap
109 saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
110 -- printDebug "similarities" similarities
113 partitions <- if (Map.size distanceMap > 0)
114 then doPartitions distanceMap
115 else panic "Text.Flow: DistanceMap is empty"
121 (as, bs) = List.unzip $ Map.keys distanceMap
122 n' = Set.size $ Set.fromList $ as <> bs
123 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
124 confluence' = confluence (Map.keys bridgeness') 3 True False
126 pure $ data2graph ti diag bridgeness' confluence' partitions
129 doDistanceMap :: Distance
131 -> HashMap (NgramsTerm, NgramsTerm) Int
132 -> ( Map (Int,Int) Double
133 , Map (Index, Index) Int
134 , Map NgramsTerm Index
136 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
139 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
141 $ HashMap.toList myCooc
143 (ti, _it) = createIndices theMatrix
147 matCooc = case distance of -- Shape of the Matrix
148 Conditional -> map2mat Triangle 0 tiSize
149 Distributional -> map2mat Square 0 tiSize
150 $ toIndex ti theMatrix
151 similarities = measure distance matCooc
154 similarities = measure Distributional
155 $ map2mat Square 0 tiSize
156 $ toIndex ti theMatrix
158 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
160 distanceMap = Map.fromList
166 $ Map.filter (> threshold)
167 $ mat2map similarities
169 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
171 myCooc' = Map.fromList $ HashMap.toList myCooc
172 (ti, _it) = createIndices myCooc'
173 -- tiSize = Map.size ti
175 -- links = round (let n :: Double = fromIntegral tiSize in n * log n)
177 distanceMap = toIndex ti
182 $ HashMap.filter (> threshold)
185 ----------------------------------------------------------
186 -- | From data to Graph
188 type Occurrences = Int
190 data2graph :: ToComId a
191 => Map NgramsTerm Int
192 -> Map (Int, Int) Occurrences
193 -> Map (Int, Int) Double
194 -> Map (Int, Int) Double
197 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
198 , _graph_edges = edges
199 , _graph_metadata = Nothing
203 nodes = map (setCoord ForceAtlas labels bridge)
204 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
205 , node_type = Terms -- or Unknown
206 , node_id = cs (show n)
207 , node_label = unNgramsTerm l
210 , node_attributes = Attributes { clust_default = fromMaybe 0
211 (Map.lookup n community_id_by_node_id)
217 , Set.member n nodesWithScores
220 edges = [ Edge { edge_source = cs (show s)
221 , edge_target = cs (show t)
222 , edge_weight = weight
223 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
224 , edge_id = cs (show i)
226 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
231 community_id_by_node_id = Map.fromList
232 $ map nodeId2comId partitions
234 labels = Map.toList labels'
236 nodesWithScores = Set.fromList
238 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
242 ------------------------------------------------------------------------
244 data Layout = KamadaKawai | ACP | ForceAtlas
247 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
248 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
254 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
255 setCoord l labels m (n,node) = node { node_x_coord = x
259 (x,y) = getCoord l labels m n
265 -> Map (Int, Int) Double
268 getCoord KamadaKawai _ _m _n = undefined -- layout m n
270 getCoord ForceAtlas _ _ n = (sin d, cos d)
274 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
276 $ pcaReduceTo (Dimension 2)
279 to2d :: Vec.Vector Double -> (Double, Double)
282 ds = take 2 $ Vec.toList v
286 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
287 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
291 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
292 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
293 ------------------------------------------------------------------------
295 -- | KamadaKawai Layout
296 -- TODO TEST: check labels, nodeId and coordinates
297 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
298 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
300 coord :: (Map Int (Double,Double))
301 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
302 --p = Layout.defaultLGL
303 p = Layout.kamadaKawai
304 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
306 -----------------------------------------------------------------------------
308 cooc2graph'' :: Ord t => Distance
311 -> Map (Index, Index) Double
312 cooc2graph'' distance threshold myCooc = neighbourMap
314 (ti, _) = createIndices myCooc
315 myCooc' = toIndex ti myCooc
316 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
317 distanceMat = measure distance matCooc
318 neighbourMap = filterByNeighbours threshold
319 $ mat2map distanceMat
322 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
323 filterByNeighbours threshold distanceMap = filteredMap
326 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
327 filteredMap :: Map (Index, Index) Double
328 filteredMap = Map.fromList
331 let selected = List.reverse
335 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
336 in List.take (round threshold) selected