2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map.Strict (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 -- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, {-recursiveClustering,-} recursiveClustering', setNodes2clusterNodes)
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
31 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
32 import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map.Strict as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
59 data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
60 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
61 instance FromJSON BridgenessMethod
62 instance ToJSON BridgenessMethod
63 instance ToSchema BridgenessMethod
64 instance Arbitrary BridgenessMethod where
65 arbitrary = elements [ minBound .. maxBound ]
68 -------------------------------------------------------------
69 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
70 -- defaultClustering x = pure $ BAC.defaultClustering x
71 defaultClustering x = spinglass 1 x
73 -------------------------------------------------------------
74 type Threshold = Double
77 cooc2graph' :: Ord t => Similarity
80 -> Map (Index, Index) Double
81 cooc2graph' distance threshold myCooc
82 = Map.filter (> threshold)
86 Conditional -> map2mat Triangle 0 tiSize
87 Distributional -> map2mat Square 0 tiSize
88 $ Map.filter (> 1) myCooc'
91 (ti, _) = createIndices myCooc
93 myCooc' = toIndex ti myCooc
97 -- coocurrences graph computation
98 cooc2graphWith :: PartitionMethod
104 -> HashMap (NgramsTerm, NgramsTerm) Int
106 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
107 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
108 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
109 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
110 -- TODO: change these options, or make them configurable in UI?
112 cooc2graphWith' :: Partitions
118 -> HashMap (NgramsTerm, NgramsTerm) Int
120 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold strength myCooc = do
121 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
122 distanceMap `seq` diag `seq` ti `seq` return ()
124 partitions <- if (Map.size distanceMap > 0)
125 then recursiveClustering' (spinglass' 1) distanceMap
126 else panic $ Text.unlines [ "I can not compute the graph you request"
127 , "because either the quantity of documents"
128 , "or the quantity of terms"
130 , "Solution: add more either Documents or Map Terms to your analysis. "
131 , "Follow the available tutorials on the Training EcoSystems. "
132 , "Ask your co-users of GarganText how to have access to it."
134 length partitions `seq` return ()
137 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
138 !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0 similarity) distanceMap
140 pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
143 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Distributional threshold strength myCooc = do
144 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
145 distanceMap `seq` diag `seq` ti `seq` return ()
147 partitions <- if (Map.size distanceMap > 0)
148 then recursiveClustering (spinglass 1) distanceMap
149 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
150 , "Maybe you should add more Map Terms in your list"
153 length partitions `seq` return ()
156 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
157 !bridgeness' = bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
159 pure $ data2graph multi ti diag bridgeness' confluence' partitions
166 doSimilarityMap :: Similarity
169 -> HashMap (NgramsTerm, NgramsTerm) Int
170 -> ( Map (Int,Int) Double
171 , Map (Index, Index) Int
172 , Map NgramsTerm Index
175 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
177 myCooc' = Map.fromList $ HashMap.toList myCooc
179 (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
181 $ HashMap.toList myCooc
183 (ti, _it) = createIndices theMatrix
186 similarities = (\m -> m `seq` m)
187 $ (\m -> m `seq` measure Conditional m)
188 $ (\m -> m `seq` map2mat Square 0 tiSize m)
189 $ theMatrix `seq` toIndex ti theMatrix
191 links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
192 distanceMap = Map.fromList
194 $ (if strength == Weak then List.reverse else identity)
197 $ Map.filter (> threshold)
198 -- $ conditional myCooc
199 $ similarities `seq` mat2map similarities
201 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
204 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
206 $ HashMap.toList myCooc
208 (ti, _it) = createIndices theMatrix
211 similarities = (\m -> m `seq` m)
212 $ (\m -> m `seq` measure Distributional m)
213 $ (\m -> m `seq` map2mat Square 0 tiSize m)
214 $ theMatrix `seq` toIndex ti theMatrix
216 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
218 distanceMap = Map.fromList
220 $ (if strength == Weak then List.reverse else identity)
224 $ (\m -> m `seq` Map.filter (> threshold) m)
225 $ similarities `seq` mat2map similarities
227 ----------------------------------------------------------
228 -- | From data to Graph
229 type Occurrences = Int
231 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
232 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
233 if HashSet.member t s1
237 data2graph :: MultiPartite
238 -> Map NgramsTerm Int
239 -> Map (Int, Int) Occurrences
240 -> Map (Int, Int) Double
241 -> Map (Int, Int) Double
244 data2graph multi labels' occurences bridge conf partitions =
245 Graph { _graph_nodes = nodes
246 , _graph_edges = edges
247 , _graph_metadata = Nothing
252 nodes = map (setCoord ForceAtlas labels bridge)
253 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
254 , node_type = nodeTypeWith multi label
255 , node_id = (cs . show) n
256 , node_label = unNgramsTerm label
260 Attributes { clust_default = fromMaybe 0
261 (Map.lookup n community_id_by_node_id)
266 | (label, n) <- labels
267 , Set.member n toKeep
270 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
272 edges = [ Edge { edge_source = cs (show s)
273 , edge_hidden = Nothing
274 , edge_target = cs (show t)
275 , edge_weight = weight
276 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
277 , edge_id = cs (show i)
279 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
284 community_id_by_node_id = Map.fromList
285 $ map nodeId2comId partitions
287 labels = Map.toList labels'
290 ------------------------------------------------------------------------
292 data Layout = KamadaKawai | ACP | ForceAtlas
295 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
296 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
302 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
303 setCoord l labels m (n,node) = node { node_x_coord = x
307 (x,y) = getCoord l labels m n
313 -> Map (Int, Int) Double
316 getCoord KamadaKawai _ _m _n = undefined -- layout m n
318 getCoord ForceAtlas _ _ n = (sin d, cos d)
322 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
324 $ pcaReduceTo (Dimension 2)
327 to2d :: Vec.Vector Double -> (Double, Double)
330 ds = take 2 $ Vec.toList v
334 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
335 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
339 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
340 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
341 ------------------------------------------------------------------------
343 -- | KamadaKawai Layout
344 -- TODO TEST: check labels, nodeId and coordinates
345 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
346 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
348 coord :: (Map Int (Double,Double))
349 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
350 --p = Layout.defaultLGL
351 p = Layout.kamadaKawai
352 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
354 -----------------------------------------------------------------------------
356 cooc2graph'' :: Ord t => Similarity
359 -> Map (Index, Index) Double
360 cooc2graph'' distance threshold myCooc = neighbourMap
362 (ti, _) = createIndices myCooc
363 myCooc' = toIndex ti myCooc
364 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
365 distanceMat = measure distance matCooc
366 neighbourMap = filterByNeighbours threshold
367 $ mat2map distanceMat
370 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
371 filterByNeighbours threshold distanceMap = filteredMap
374 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
375 filteredMap :: Map (Index, Index) Double
376 filteredMap = Map.fromList
379 let selected = List.reverse
383 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
384 in List.take (round threshold) selected