2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
28 import Gargantext.Core.Statistics
29 import Gargantext.Core.Viz.Graph
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
34 import Gargantext.Prelude
35 import Graph.Types (ClusterNode)
36 import IGraph.Random -- (Gen(..))
37 import Test.QuickCheck (elements)
38 import Test.QuickCheck.Arbitrary
39 import qualified Data.HashMap.Strict as HashMap
40 import qualified Data.List as List
41 import qualified Data.Map as Map
42 import qualified Data.Set as Set
43 import qualified Data.Vector.Storable as Vec
44 import qualified Graph.BAC.ProxemyOptim as BAC
45 import qualified IGraph as Igraph
46 import qualified IGraph.Algorithms.Layout as Layout
49 data PartitionMethod = Spinglass | Confluence
50 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
51 instance FromJSON PartitionMethod
52 instance ToJSON PartitionMethod
53 instance ToSchema PartitionMethod
54 instance Arbitrary PartitionMethod where
55 arbitrary = elements [ minBound .. maxBound ]
58 -------------------------------------------------------------
59 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
60 -- defaultClustering x = pure $ BAC.defaultClustering x
61 defaultClustering x = spinglass 1 x
63 -------------------------------------------------------------
64 type Threshold = Double
67 cooc2graph' :: Ord t => Distance
70 -> Map (Index, Index) Double
71 cooc2graph' distance threshold myCooc
72 = Map.filter (> threshold)
76 Conditional -> map2mat Triangle 0 tiSize
77 Distributional -> map2mat Square 0 tiSize
78 $ Map.filter (> 1) myCooc'
81 (ti, _) = createIndices myCooc
83 myCooc' = toIndex ti myCooc
87 -- coocurrences graph computation
88 cooc2graphWith :: PartitionMethod
91 -> HashMap (NgramsTerm, NgramsTerm) Int
93 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
94 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
97 cooc2graphWith' :: ToComId a
101 -> HashMap (NgramsTerm, NgramsTerm) Int
103 cooc2graphWith' doPartitions distance threshold myCooc = do
105 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
108 saveAsFileDebug "debug/distanceMap" distanceMap
109 printDebug "similarities" similarities
112 partitions <- if (Map.size distanceMap > 0)
113 then doPartitions distanceMap
114 else panic "Text.Flow: DistanceMap is empty"
120 (as, bs) = List.unzip $ Map.keys distanceMap
121 n' = Set.size $ Set.fromList $ as <> bs
122 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
123 confluence' = confluence (Map.keys bridgeness') 3 True False
125 pure $ data2graph ti diag bridgeness' confluence' partitions
128 doDistanceMap :: Distance
130 -> HashMap (NgramsTerm, NgramsTerm) Int
131 -> ( Map (Int,Int) Double
132 , Map (Index, Index) Int
133 , Map NgramsTerm Index
135 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
138 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
140 $ HashMap.toList myCooc
142 (ti, _it) = createIndices theMatrix
146 matCooc = case distance of -- Shape of the Matrix
147 Conditional -> map2mat Triangle 0 tiSize
148 Distributional -> map2mat Square 0 tiSize
149 $ toIndex ti theMatrix
150 similarities = measure distance matCooc
153 similarities = measure Distributional
154 $ map2mat Square 0 tiSize
155 $ toIndex ti theMatrix
157 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
159 distanceMap = Map.fromList
165 $ Map.filter (> threshold)
166 $ mat2map similarities
168 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
170 myCooc' = Map.fromList $ HashMap.toList myCooc
171 (ti, _it) = createIndices myCooc'
174 links = round (let n :: Double = fromIntegral tiSize in n * log n)
176 distanceMap = toIndex ti
181 $ HashMap.filter (> threshold)
184 ----------------------------------------------------------
185 -- | From data to Graph
187 type Occurrences = Int
189 data2graph :: ToComId a
190 => Map NgramsTerm Int
191 -> Map (Int, Int) Occurrences
192 -> Map (Int, Int) Double
193 -> Map (Int, Int) Double
196 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
197 , _graph_edges = edges
198 , _graph_metadata = Nothing
202 nodes = map (setCoord ForceAtlas labels bridge)
203 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
204 , node_type = Terms -- or Unknown
205 , node_id = cs (show n)
206 , node_label = unNgramsTerm l
209 , node_attributes = Attributes { clust_default = fromMaybe 0
210 (Map.lookup n community_id_by_node_id)
212 , node_children = [] }
215 , Set.member n nodesWithScores
218 edges = [ Edge { edge_source = cs (show s)
219 , edge_target = cs (show t)
220 , edge_weight = weight
221 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
222 , edge_id = cs (show i)
224 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
229 community_id_by_node_id = Map.fromList
230 $ map nodeId2comId partitions
232 labels = Map.toList labels'
234 nodesWithScores = Set.fromList
236 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
240 ------------------------------------------------------------------------
242 data Layout = KamadaKawai | ACP | ForceAtlas
245 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
246 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
252 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
253 setCoord l labels m (n,node) = node { node_x_coord = x
257 (x,y) = getCoord l labels m n
263 -> Map (Int, Int) Double
266 getCoord KamadaKawai _ _m _n = undefined -- layout m n
268 getCoord ForceAtlas _ _ n = (sin d, cos d)
272 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
274 $ pcaReduceTo (Dimension 2)
277 to2d :: Vec.Vector Double -> (Double, Double)
280 ds = take 2 $ Vec.toList v
284 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
285 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
289 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
290 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
291 ------------------------------------------------------------------------
293 -- | KamadaKawai Layout
294 -- TODO TEST: check labels, nodeId and coordinates
295 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
296 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
298 coord :: (Map Int (Double,Double))
299 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
300 --p = Layout.defaultLGL
301 p = Layout.kamadaKawai
302 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
304 -----------------------------------------------------------------------------
306 cooc2graph'' :: Ord t => Distance
309 -> Map (Index, Index) Double
310 cooc2graph'' distance threshold myCooc = neighbourMap
312 (ti, _) = createIndices myCooc
313 myCooc' = toIndex ti myCooc
314 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
315 distanceMat = measure distance matCooc
316 neighbourMap = filterByNeighbours threshold
317 $ mat2map distanceMat
320 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
321 filterByNeighbours threshold distanceMap = filteredMap
324 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
325 filteredMap :: Map (Index, Index) Double
326 filteredMap = Map.fromList
329 let selected = List.reverse
333 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
334 in List.take (round threshold) selected