2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
52 data PartitionMethod = Spinglass | Confluence | Infomap
53 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
54 instance FromJSON PartitionMethod
55 instance ToJSON PartitionMethod
56 instance ToSchema PartitionMethod
57 instance Arbitrary PartitionMethod where
58 arbitrary = elements [ minBound .. maxBound ]
61 -------------------------------------------------------------
62 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
63 -- defaultClustering x = pure $ BAC.defaultClustering x
64 defaultClustering x = spinglass 1 x
66 -------------------------------------------------------------
67 type Threshold = Double
70 cooc2graph' :: Ord t => Similarity
73 -> Map (Index, Index) Double
74 cooc2graph' distance threshold myCooc
75 = Map.filter (> threshold)
79 Conditional -> map2mat Triangle 0 tiSize
80 Distributional -> map2mat Square 0 tiSize
81 $ Map.filter (> 1) myCooc'
84 (ti, _) = createIndices myCooc
86 myCooc' = toIndex ti myCooc
90 -- coocurrences graph computation
91 cooc2graphWith :: PartitionMethod
96 -> HashMap (NgramsTerm, NgramsTerm) Int
98 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
99 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
100 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
101 -- TODO: change these options, or make them configurable in UI?
104 cooc2graphWith' :: ToComId a
110 -> HashMap (NgramsTerm, NgramsTerm) Int
112 cooc2graphWith' doPartitions multi similarity threshold strength myCooc = do
113 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
114 distanceMap `seq` diag `seq` ti `seq` return ()
117 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
118 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
119 -- printDebug "similarities" similarities
122 partitions <- if (Map.size distanceMap > 0)
123 then doPartitions distanceMap
124 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
125 , "Maybe you should add more Map Terms in your list"
126 , "Tutorial: link todo"
128 length partitions `seq` return ()
133 (as, bs) = List.unzip $ Map.keys distanceMap
134 n' = Set.size $ Set.fromList $ as <> bs
135 !bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
136 !confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
137 pure $ data2graph multi ti diag bridgeness' confluence' partitions
141 doSimilarityMap :: Similarity
144 -> HashMap (NgramsTerm, NgramsTerm) Int
145 -> ( Map (Int,Int) Double
146 , Map (Index, Index) Int
147 , Map NgramsTerm Index
149 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
152 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
154 $ HashMap.toList myCooc
156 (ti, _it) = createIndices theMatrix
159 similarities = (\m -> m `seq` m)
160 $ (\m -> m `seq` measure Distributional m)
161 $ (\m -> m `seq` map2mat Square 0 tiSize m)
162 $ theMatrix `seq` toIndex ti theMatrix
164 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
166 distanceMap = Map.fromList
168 $ (if strength == Weak then List.reverse else identity)
172 $ (\m -> m `seq` Map.filter (> threshold) m)
173 $ similarities `seq` mat2map similarities
175 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
177 myCooc' = Map.fromList $ HashMap.toList myCooc
178 (ti, _it) = createIndices myCooc'
179 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
180 distanceMap = toIndex ti
183 $ (if strength == Weak then List.reverse else identity)
186 $ HashMap.filter (> threshold)
189 ----------------------------------------------------------
190 -- | From data to Graph
191 type Occurrences = Int
193 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
194 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
195 if HashSet.member t s1
200 data2graph :: ToComId a
202 -> Map NgramsTerm Int
203 -> Map (Int, Int) Occurrences
204 -> Map (Int, Int) Double
205 -> Map (Int, Int) Double
208 data2graph multi labels' occurences bridge conf partitions =
209 Graph { _graph_nodes = nodes
210 , _graph_edges = edges
211 , _graph_metadata = Nothing
216 nodes = map (setCoord ForceAtlas labels bridge)
217 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
218 , node_type = nodeTypeWith multi label
219 , node_id = (cs . show) n
220 , node_label = unNgramsTerm label
224 Attributes { clust_default = fromMaybe 0
225 (Map.lookup n community_id_by_node_id)
230 | (label, n) <- labels
231 , Set.member n toKeep
234 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
236 edges = [ Edge { edge_source = cs (show s)
237 , edge_target = cs (show t)
238 , edge_weight = weight
239 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
240 , edge_id = cs (show i)
242 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
247 community_id_by_node_id = Map.fromList
248 $ map nodeId2comId partitions
250 labels = Map.toList labels'
253 ------------------------------------------------------------------------
255 data Layout = KamadaKawai | ACP | ForceAtlas
258 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
259 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
265 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
266 setCoord l labels m (n,node) = node { node_x_coord = x
270 (x,y) = getCoord l labels m n
276 -> Map (Int, Int) Double
279 getCoord KamadaKawai _ _m _n = undefined -- layout m n
281 getCoord ForceAtlas _ _ n = (sin d, cos d)
285 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
287 $ pcaReduceTo (Dimension 2)
290 to2d :: Vec.Vector Double -> (Double, Double)
293 ds = take 2 $ Vec.toList v
297 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
298 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
302 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
303 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
304 ------------------------------------------------------------------------
306 -- | KamadaKawai Layout
307 -- TODO TEST: check labels, nodeId and coordinates
308 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
309 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
311 coord :: (Map Int (Double,Double))
312 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
313 --p = Layout.defaultLGL
314 p = Layout.kamadaKawai
315 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
317 -----------------------------------------------------------------------------
319 cooc2graph'' :: Ord t => Similarity
322 -> Map (Index, Index) Double
323 cooc2graph'' distance threshold myCooc = neighbourMap
325 (ti, _) = createIndices myCooc
326 myCooc' = toIndex ti myCooc
327 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
328 distanceMat = measure distance matCooc
329 neighbourMap = filterByNeighbours threshold
330 $ mat2map distanceMat
333 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
334 filterByNeighbours threshold distanceMap = filteredMap
337 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
338 filteredMap :: Map (Index, Index) Double
339 filteredMap = Map.fromList
342 let selected = List.reverse
346 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
347 in List.take (round threshold) selected