2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
14 module Gargantext.Core.Viz.Graph.Tools
18 import Data.HashMap.Strict (HashMap)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
52 data PartitionMethod = Spinglass | Confluence | Infomap
53 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
54 instance FromJSON PartitionMethod
55 instance ToJSON PartitionMethod
56 instance ToSchema PartitionMethod
57 instance Arbitrary PartitionMethod where
58 arbitrary = elements [ minBound .. maxBound ]
61 -------------------------------------------------------------
62 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
63 -- defaultClustering x = pure $ BAC.defaultClustering x
64 defaultClustering x = spinglass 1 x
66 -------------------------------------------------------------
67 type Threshold = Double
70 cooc2graph' :: Ord t => Similarity
73 -> Map (Index, Index) Double
74 cooc2graph' distance threshold myCooc
75 = Map.filter (> threshold)
79 Conditional -> map2mat Triangle 0 tiSize
80 Distributional -> map2mat Square 0 tiSize
81 $ Map.filter (> 1) myCooc'
84 (ti, _) = createIndices myCooc
86 myCooc' = toIndex ti myCooc
90 -- coocurrences graph computation
91 cooc2graphWith :: PartitionMethod
96 -> HashMap (NgramsTerm, NgramsTerm) Int
98 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
99 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
100 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
101 -- TODO: change these options, or make them configurable in UI?
104 cooc2graphWith' :: ToComId a
110 -> HashMap (NgramsTerm, NgramsTerm) Int
112 cooc2graphWith' doPartitions multi similarity threshold strength myCooc = do
113 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
114 distanceMap `seq` diag `seq` ti `seq` return ()
117 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
118 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
119 -- printDebug "similarities" similarities
122 partitions <- if (Map.size distanceMap > 0)
123 then doPartitions distanceMap
124 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
125 , "Maybe you should add more Map Terms in your list"
126 , "Tutorial: link todo"
128 length partitions `seq` return ()
133 (as, bs) = List.unzip $ Map.keys distanceMap
134 n' = Set.size $ Set.fromList $ as <> bs
135 !bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
136 !confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
137 pure $ data2graph multi ti diag bridgeness' confluence' partitions
141 doSimilarityMap :: Similarity
144 -> HashMap (NgramsTerm, NgramsTerm) Int
145 -> ( Map (Int,Int) Double
146 , Map (Index, Index) Int
147 , Map NgramsTerm Index
149 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
152 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
154 $ HashMap.toList myCooc
156 (ti, _it) = createIndices theMatrix
159 similarities = (\m -> m `seq` m)
160 $ (\m -> m `seq` measure Distributional m)
161 $ (\m -> m `seq` map2mat Square 0 tiSize m)
162 $ theMatrix `seq` toIndex ti theMatrix
164 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
166 distanceMap = Map.fromList
168 $ (if strength == Weak then List.reverse else identity)
172 $ (\m -> m `seq` Map.filter (> threshold) m)
173 $ similarities `seq` mat2map similarities
175 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
177 myCooc' = Map.fromList $ HashMap.toList myCooc
178 (ti, _it) = createIndices myCooc'
179 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
180 distanceMap = toIndex ti
183 $ (if strength == Weak then List.reverse else identity)
186 $ HashMap.filter (> threshold)
189 ----------------------------------------------------------
190 -- | From data to Graph
192 type Occurrences = Int
194 multiPartiteWith :: MultiPartite -> NgramsTerm -> TypeNode
195 multiPartiteWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
196 if HashSet.member t s1
200 typeNode :: NgramsType -> TypeNode
201 typeNode NgramsTerms = Terms
205 data2graph :: ToComId a
207 -> Map NgramsTerm Int
208 -> Map (Int, Int) Occurrences
209 -> Map (Int, Int) Double
210 -> Map (Int, Int) Double
213 data2graph multi labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
214 , _graph_edges = edges
215 , _graph_metadata = Nothing
219 nodes = map (setCoord ForceAtlas labels bridge)
220 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
221 , node_type = multiPartiteWith multi l
222 , node_id = cs (show n)
223 , node_label = unNgramsTerm l
226 , node_attributes = Attributes { clust_default = fromMaybe 0
227 (Map.lookup n community_id_by_node_id)
233 , Set.member n toKeep
236 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
238 edges = [ Edge { edge_source = cs (show s)
239 , edge_target = cs (show t)
240 , edge_weight = weight
241 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
242 , edge_id = cs (show i)
244 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
249 community_id_by_node_id = Map.fromList
250 $ map nodeId2comId partitions
252 labels = Map.toList labels'
255 ------------------------------------------------------------------------
257 data Layout = KamadaKawai | ACP | ForceAtlas
260 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
261 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
267 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
268 setCoord l labels m (n,node) = node { node_x_coord = x
272 (x,y) = getCoord l labels m n
278 -> Map (Int, Int) Double
281 getCoord KamadaKawai _ _m _n = undefined -- layout m n
283 getCoord ForceAtlas _ _ n = (sin d, cos d)
287 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
289 $ pcaReduceTo (Dimension 2)
292 to2d :: Vec.Vector Double -> (Double, Double)
295 ds = take 2 $ Vec.toList v
299 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
300 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
304 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
305 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
306 ------------------------------------------------------------------------
308 -- | KamadaKawai Layout
309 -- TODO TEST: check labels, nodeId and coordinates
310 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
311 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
313 coord :: (Map Int (Double,Double))
314 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
315 --p = Layout.defaultLGL
316 p = Layout.kamadaKawai
317 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
319 -----------------------------------------------------------------------------
321 cooc2graph'' :: Ord t => Similarity
324 -> Map (Index, Index) Double
325 cooc2graph'' distance threshold myCooc = neighbourMap
327 (ti, _) = createIndices myCooc
328 myCooc' = toIndex ti myCooc
329 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
330 distanceMat = measure distance matCooc
331 neighbourMap = filterByNeighbours threshold
332 $ mat2map distanceMat
335 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
336 filterByNeighbours threshold distanceMap = filteredMap
339 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
340 filteredMap :: Map (Index, Index) Double
341 filteredMap = Map.fromList
344 let selected = List.reverse
348 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
349 in List.take (round threshold) selected