2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness3, Partitions, ToComId(..))
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
52 data PartitionMethod = Spinglass | Confluence | Infomap
53 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
54 instance FromJSON PartitionMethod
55 instance ToJSON PartitionMethod
56 instance ToSchema PartitionMethod
57 instance Arbitrary PartitionMethod where
58 arbitrary = elements [ minBound .. maxBound ]
61 -------------------------------------------------------------
62 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
63 -- defaultClustering x = pure $ BAC.defaultClustering x
64 defaultClustering x = spinglass 1 x
66 -------------------------------------------------------------
67 type Threshold = Double
70 cooc2graph' :: Ord t => Similarity
73 -> Map (Index, Index) Double
74 cooc2graph' distance threshold myCooc
75 = Map.filter (> threshold)
79 Conditional -> map2mat Triangle 0 tiSize
80 Distributional -> map2mat Square 0 tiSize
81 $ Map.filter (> 1) myCooc'
84 (ti, _) = createIndices myCooc
86 myCooc' = toIndex ti myCooc
90 -- coocurrences graph computation
91 cooc2graphWith :: PartitionMethod
96 -> HashMap (NgramsTerm, NgramsTerm) Int
98 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
99 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
100 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
101 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
102 -- TODO: change these options, or make them configurable in UI?
105 cooc2graphWith' :: ToComId a
111 -> HashMap (NgramsTerm, NgramsTerm) Int
113 cooc2graphWith' doPartitions multi similarity threshold strength myCooc = do
114 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
115 distanceMap `seq` diag `seq` ti `seq` return ()
118 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
119 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
120 -- printDebug "similarities" similarities
123 partitions <- if (Map.size distanceMap > 0)
124 then doPartitions distanceMap
125 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
126 , "Maybe you should add more Map Terms in your list"
127 , "Tutorial: link todo"
129 length partitions `seq` return ()
132 !confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
133 !bridgeness' = bridgeness3 confluence' distanceMap
134 pure $ data2graph multi ti diag bridgeness' confluence' partitions
138 doSimilarityMap :: Similarity
141 -> HashMap (NgramsTerm, NgramsTerm) Int
142 -> ( Map (Int,Int) Double
143 , Map (Index, Index) Int
144 , Map NgramsTerm Index
146 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
149 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
151 $ HashMap.toList myCooc
153 (ti, _it) = createIndices theMatrix
156 similarities = (\m -> m `seq` m)
157 $ (\m -> m `seq` measure Distributional m)
158 $ (\m -> m `seq` map2mat Square 0 tiSize m)
159 $ theMatrix `seq` toIndex ti theMatrix
161 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
163 distanceMap = Map.fromList
165 $ (if strength == Weak then List.reverse else identity)
169 $ (\m -> m `seq` Map.filter (> threshold) m)
170 $ similarities `seq` mat2map similarities
172 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
174 myCooc' = Map.fromList $ HashMap.toList myCooc
175 (ti, _it) = createIndices myCooc'
176 links = round (let n :: Double = fromIntegral (Map.size ti) in n * (log n)^(2::Int))
177 distanceMap = toIndex ti
180 $ (if strength == Weak then List.reverse else identity)
183 $ HashMap.filter (> threshold)
186 ----------------------------------------------------------
187 -- | From data to Graph
188 type Occurrences = Int
190 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
191 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
192 if HashSet.member t s1
197 data2graph :: ToComId a
199 -> Map NgramsTerm Int
200 -> Map (Int, Int) Occurrences
201 -> Map (Int, Int) Double
202 -> Map (Int, Int) Double
205 data2graph multi labels' occurences bridge conf partitions =
206 Graph { _graph_nodes = nodes
207 , _graph_edges = edges
208 , _graph_metadata = Nothing
213 nodes = map (setCoord ForceAtlas labels bridge)
214 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
215 , node_type = nodeTypeWith multi label
216 , node_id = (cs . show) n
217 , node_label = unNgramsTerm label
221 Attributes { clust_default = fromMaybe 0
222 (Map.lookup n community_id_by_node_id)
227 | (label, n) <- labels
228 , Set.member n toKeep
231 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
233 edges = [ Edge { edge_source = cs (show s)
234 , edge_target = cs (show t)
235 , edge_weight = weight
236 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
237 , edge_id = cs (show i)
239 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
244 community_id_by_node_id = Map.fromList
245 $ map nodeId2comId partitions
247 labels = Map.toList labels'
250 ------------------------------------------------------------------------
252 data Layout = KamadaKawai | ACP | ForceAtlas
255 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
256 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
262 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
263 setCoord l labels m (n,node) = node { node_x_coord = x
267 (x,y) = getCoord l labels m n
273 -> Map (Int, Int) Double
276 getCoord KamadaKawai _ _m _n = undefined -- layout m n
278 getCoord ForceAtlas _ _ n = (sin d, cos d)
282 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
284 $ pcaReduceTo (Dimension 2)
287 to2d :: Vec.Vector Double -> (Double, Double)
290 ds = take 2 $ Vec.toList v
294 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
295 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
299 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
300 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
301 ------------------------------------------------------------------------
303 -- | KamadaKawai Layout
304 -- TODO TEST: check labels, nodeId and coordinates
305 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
306 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
308 coord :: (Map Int (Double,Double))
309 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
310 --p = Layout.defaultLGL
311 p = Layout.kamadaKawai
312 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
314 -----------------------------------------------------------------------------
316 cooc2graph'' :: Ord t => Similarity
319 -> Map (Index, Index) Double
320 cooc2graph'' distance threshold myCooc = neighbourMap
322 (ti, _) = createIndices myCooc
323 myCooc' = toIndex ti myCooc
324 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
325 distanceMat = measure distance matCooc
326 neighbourMap = filterByNeighbours threshold
327 $ mat2map distanceMat
330 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
331 filterByNeighbours threshold distanceMap = filteredMap
334 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
335 filteredMap :: Map (Index, Index) Double
336 filteredMap = Map.fromList
339 let selected = List.reverse
343 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
344 in List.take (round threshold) selected