2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness3, Partitions, ToComId(..))
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
52 data PartitionMethod = Spinglass | Confluence | Infomap
53 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
54 instance FromJSON PartitionMethod
55 instance ToJSON PartitionMethod
56 instance ToSchema PartitionMethod
57 instance Arbitrary PartitionMethod where
58 arbitrary = elements [ minBound .. maxBound ]
61 -------------------------------------------------------------
62 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
63 -- defaultClustering x = pure $ BAC.defaultClustering x
64 defaultClustering x = spinglass 1 x
66 -------------------------------------------------------------
67 type Threshold = Double
70 cooc2graph' :: Ord t => Similarity
73 -> Map (Index, Index) Double
74 cooc2graph' distance threshold myCooc
75 = Map.filter (> threshold)
79 Conditional -> map2mat Triangle 0 tiSize
80 Distributional -> map2mat Square 0 tiSize
81 $ Map.filter (> 1) myCooc'
84 (ti, _) = createIndices myCooc
86 myCooc' = toIndex ti myCooc
90 -- coocurrences graph computation
91 cooc2graphWith :: PartitionMethod
96 -> HashMap (NgramsTerm, NgramsTerm) Int
98 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
99 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
100 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
101 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
102 -- TODO: change these options, or make them configurable in UI?
105 cooc2graphWith' :: ToComId a
111 -> HashMap (NgramsTerm, NgramsTerm) Int
113 cooc2graphWith' doPartitions multi similarity threshold strength myCooc = do
114 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
115 distanceMap `seq` diag `seq` ti `seq` return ()
118 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
119 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
120 -- printDebug "similarities" similarities
123 partitions <- if (Map.size distanceMap > 0)
124 then doPartitions distanceMap
125 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
126 , "Maybe you should add more Map Terms in your list"
127 , "Tutorial: link todo"
129 length partitions `seq` return ()
132 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
133 !bridgeness' = bridgeness3 similarity confluence' distanceMap
135 pure $ data2graph multi ti diag bridgeness' confluence' partitions
139 doSimilarityMap :: Similarity
142 -> HashMap (NgramsTerm, NgramsTerm) Int
143 -> ( Map (Int,Int) Double
144 , Map (Index, Index) Int
145 , Map NgramsTerm Index
147 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
150 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
152 $ HashMap.toList myCooc
154 (ti, _it) = createIndices theMatrix
157 similarities = (\m -> m `seq` m)
158 $ (\m -> m `seq` measure Distributional m)
159 $ (\m -> m `seq` map2mat Square 0 tiSize m)
160 $ theMatrix `seq` toIndex ti theMatrix
162 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
164 distanceMap = Map.fromList
166 $ (if strength == Weak then List.reverse else identity)
170 $ (\m -> m `seq` Map.filter (> threshold) m)
171 $ similarities `seq` mat2map similarities
173 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
175 myCooc' = Map.fromList $ HashMap.toList myCooc
176 (ti, _it) = createIndices myCooc'
177 links = round (let n :: Double = fromIntegral (Map.size ti) in n * (log n)^(2::Int))
178 distanceMap = toIndex ti
181 $ (if strength == Weak then List.reverse else identity)
184 $ HashMap.filter (> threshold)
187 ----------------------------------------------------------
188 -- | From data to Graph
189 type Occurrences = Int
191 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
192 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
193 if HashSet.member t s1
198 data2graph :: ToComId a
200 -> Map NgramsTerm Int
201 -> Map (Int, Int) Occurrences
202 -> Map (Int, Int) Double
203 -> Map (Int, Int) Double
206 data2graph multi labels' occurences bridge conf partitions =
207 Graph { _graph_nodes = nodes
208 , _graph_edges = edges
209 , _graph_metadata = Nothing
214 nodes = map (setCoord ForceAtlas labels bridge)
215 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
216 , node_type = nodeTypeWith multi label
217 , node_id = (cs . show) n
218 , node_label = unNgramsTerm label
222 Attributes { clust_default = fromMaybe 0
223 (Map.lookup n community_id_by_node_id)
228 | (label, n) <- labels
229 , Set.member n toKeep
232 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
234 edges = [ Edge { edge_source = cs (show s)
235 , edge_target = cs (show t)
236 , edge_weight = weight
237 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
238 , edge_id = cs (show i)
240 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
245 community_id_by_node_id = Map.fromList
246 $ map nodeId2comId partitions
248 labels = Map.toList labels'
251 ------------------------------------------------------------------------
253 data Layout = KamadaKawai | ACP | ForceAtlas
256 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
257 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
263 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
264 setCoord l labels m (n,node) = node { node_x_coord = x
268 (x,y) = getCoord l labels m n
274 -> Map (Int, Int) Double
277 getCoord KamadaKawai _ _m _n = undefined -- layout m n
279 getCoord ForceAtlas _ _ n = (sin d, cos d)
283 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
285 $ pcaReduceTo (Dimension 2)
288 to2d :: Vec.Vector Double -> (Double, Double)
291 ds = take 2 $ Vec.toList v
295 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
296 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
300 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
301 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
302 ------------------------------------------------------------------------
304 -- | KamadaKawai Layout
305 -- TODO TEST: check labels, nodeId and coordinates
306 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
307 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
309 coord :: (Map Int (Double,Double))
310 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
311 --p = Layout.defaultLGL
312 p = Layout.kamadaKawai
313 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
315 -----------------------------------------------------------------------------
317 cooc2graph'' :: Ord t => Similarity
320 -> Map (Index, Index) Double
321 cooc2graph'' distance threshold myCooc = neighbourMap
323 (ti, _) = createIndices myCooc
324 myCooc' = toIndex ti myCooc
325 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
326 distanceMat = measure distance matCooc
327 neighbourMap = filterByNeighbours threshold
328 $ mat2map distanceMat
331 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
332 filterByNeighbours threshold distanceMap = filteredMap
335 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
336 filteredMap :: Map (Index, Index) Double
337 filteredMap = Map.fromList
340 let selected = List.reverse
344 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
345 in List.take (round threshold) selected