2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId)
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
52 data PartitionMethod = Spinglass | Confluence | Infomap
53 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
54 instance FromJSON PartitionMethod
55 instance ToJSON PartitionMethod
56 instance ToSchema PartitionMethod
57 instance Arbitrary PartitionMethod where
58 arbitrary = elements [ minBound .. maxBound ]
60 data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
61 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
62 instance FromJSON BridgenessMethod
63 instance ToJSON BridgenessMethod
64 instance ToSchema BridgenessMethod
65 instance Arbitrary BridgenessMethod where
66 arbitrary = elements [ minBound .. maxBound ]
69 -------------------------------------------------------------
70 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
71 -- defaultClustering x = pure $ BAC.defaultClustering x
72 defaultClustering x = spinglass 1 x
74 -------------------------------------------------------------
75 type Threshold = Double
78 cooc2graph' :: Ord t => Similarity
81 -> Map (Index, Index) Double
82 cooc2graph' distance threshold myCooc
83 = Map.filter (> threshold)
87 Conditional -> map2mat Triangle 0 tiSize
88 Distributional -> map2mat Square 0 tiSize
89 $ Map.filter (> 1) myCooc'
92 (ti, _) = createIndices myCooc
94 myCooc' = toIndex ti myCooc
98 -- coocurrences graph computation
99 cooc2graphWith :: PartitionMethod
105 -> HashMap (NgramsTerm, NgramsTerm) Int
107 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
108 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
109 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
110 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
111 -- TODO: change these options, or make them configurable in UI?
114 cooc2graphWith' :: Partitions
120 -> HashMap (NgramsTerm, NgramsTerm) Int
122 cooc2graphWith' doPartitions bridgenessMethod multi similarity threshold strength myCooc = do
123 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
124 distanceMap `seq` diag `seq` ti `seq` return ()
127 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
128 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
129 -- printDebug "similarities" similarities
132 partitions <- if (Map.size distanceMap > 0)
133 then doPartitions distanceMap
134 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
135 , "Maybe you should add more Map Terms in your list"
136 , "Tutorial: link todo"
138 length partitions `seq` return ()
141 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
142 !bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
143 then bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
144 else bridgeness (Bridgeness_Advanced similarity confluence') distanceMap
146 pure $ data2graph multi ti diag bridgeness' confluence' partitions
150 doSimilarityMap :: Similarity
153 -> HashMap (NgramsTerm, NgramsTerm) Int
154 -> ( Map (Int,Int) Double
155 , Map (Index, Index) Int
156 , Map NgramsTerm Index
158 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
161 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
163 $ HashMap.toList myCooc
165 (ti, _it) = createIndices theMatrix
168 similarities = (\m -> m `seq` m)
169 $ (\m -> m `seq` measure Distributional m)
170 $ (\m -> m `seq` map2mat Square 0 tiSize m)
171 $ theMatrix `seq` toIndex ti theMatrix
173 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
175 distanceMap = Map.fromList
177 $ (if strength == Weak then List.reverse else identity)
181 $ (\m -> m `seq` Map.filter (> threshold) m)
182 $ similarities `seq` mat2map similarities
184 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
186 myCooc' = Map.fromList $ HashMap.toList myCooc
187 (ti, _it) = createIndices myCooc'
188 links = round (let n :: Double = fromIntegral (Map.size ti) in n * (log n)^(2::Int))
189 distanceMap = toIndex ti
192 $ (if strength == Weak then List.reverse else identity)
195 $ HashMap.filter (> threshold)
198 ----------------------------------------------------------
199 -- | From data to Graph
200 type Occurrences = Int
202 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
203 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
204 if HashSet.member t s1
209 data2graph :: MultiPartite
210 -> Map NgramsTerm Int
211 -> Map (Int, Int) Occurrences
212 -> Map (Int, Int) Double
213 -> Map (Int, Int) Double
216 data2graph multi labels' occurences bridge conf partitions =
217 Graph { _graph_nodes = nodes
218 , _graph_edges = edges
219 , _graph_metadata = Nothing
224 nodes = map (setCoord ForceAtlas labels bridge)
225 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
226 , node_type = nodeTypeWith multi label
227 , node_id = (cs . show) n
228 , node_label = unNgramsTerm label
232 Attributes { clust_default = fromMaybe 0
233 (Map.lookup n community_id_by_node_id)
238 | (label, n) <- labels
239 , Set.member n toKeep
242 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
244 edges = [ Edge { edge_source = cs (show s)
245 , edge_target = cs (show t)
246 , edge_weight = weight
247 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
248 , edge_id = cs (show i)
250 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
255 community_id_by_node_id = Map.fromList
256 $ map nodeId2comId partitions
258 labels = Map.toList labels'
261 ------------------------------------------------------------------------
263 data Layout = KamadaKawai | ACP | ForceAtlas
266 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
267 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
273 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
274 setCoord l labels m (n,node) = node { node_x_coord = x
278 (x,y) = getCoord l labels m n
284 -> Map (Int, Int) Double
287 getCoord KamadaKawai _ _m _n = undefined -- layout m n
289 getCoord ForceAtlas _ _ n = (sin d, cos d)
293 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
295 $ pcaReduceTo (Dimension 2)
298 to2d :: Vec.Vector Double -> (Double, Double)
301 ds = take 2 $ Vec.toList v
305 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
306 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
310 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
311 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
312 ------------------------------------------------------------------------
314 -- | KamadaKawai Layout
315 -- TODO TEST: check labels, nodeId and coordinates
316 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
317 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
319 coord :: (Map Int (Double,Double))
320 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
321 --p = Layout.defaultLGL
322 p = Layout.kamadaKawai
323 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
325 -----------------------------------------------------------------------------
327 cooc2graph'' :: Ord t => Similarity
330 -> Map (Index, Index) Double
331 cooc2graph'' distance threshold myCooc = neighbourMap
333 (ti, _) = createIndices myCooc
334 myCooc' = toIndex ti myCooc
335 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
336 distanceMat = measure distance matCooc
337 neighbourMap = filterByNeighbours threshold
338 $ mat2map distanceMat
341 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
342 filterByNeighbours threshold distanceMap = filteredMap
345 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
346 filteredMap :: Map (Index, Index) Double
347 filteredMap = Map.fromList
350 let selected = List.reverse
354 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
355 in List.take (round threshold) selected