]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
Merge branch 'dev' into dev-test
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 -- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 import Debug.Trace (trace)
22 import GHC.Float (sin, cos)
23 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
24 import Gargantext.Core.Methods.Distances (Distance(..), measure)
25 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
26 import Gargantext.Core.Statistics
27 import Gargantext.Core.Viz.Graph
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, ClusterNode)
31 import Gargantext.Prelude
32 import IGraph.Random -- (Gen(..))
33 import qualified Data.HashMap.Strict as HashMap
34 import qualified Data.List as List
35 import qualified Data.Map as Map
36 import qualified Data.Set as Set
37 import qualified Data.Vector.Storable as Vec
38 import qualified IGraph as Igraph
39 import qualified IGraph.Algorithms.Layout as Layout
40
41
42 -------------------------------------------------------------
43
44 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
45 defaultClustering = spinglass 1
46
47 -------------------------------------------------------------
48
49 type Threshold = Double
50
51 cooc2graph' :: Ord t => Distance
52 -> Double
53 -> Map (t, t) Int
54 -> Map (Index, Index) Double
55 cooc2graph' distance threshold myCooc
56 = Map.filter (> threshold)
57 $ mat2map
58 $ measure distance
59 $ case distance of
60 Conditional -> map2mat Triangle 0 tiSize
61 Distributional -> map2mat Square 0 tiSize
62 $ Map.filter (> 1) myCooc'
63
64 where
65 (ti, _) = createIndices myCooc
66 tiSize = Map.size ti
67 myCooc' = toIndex ti myCooc
68
69
70 data PartitionMethod = Louvain | Spinglass
71
72 -- | coocurrences graph computation
73 cooc2graphWith :: PartitionMethod
74 -> Distance
75 -> Threshold
76 -> HashMap (NgramsTerm, NgramsTerm) Int
77 -> IO Graph
78 cooc2graphWith Louvain = undefined -- TODO use IGraph bindings
79 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
80
81 cooc2graph'' :: Ord t => Distance
82 -> Double
83 -> Map (t, t) Int
84 -> Map (Index, Index) Double
85 cooc2graph'' distance threshold myCooc = neighbourMap
86 where
87 (ti, _) = createIndices myCooc
88 myCooc' = toIndex ti myCooc
89 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
90 distanceMat = measure distance matCooc
91 neighbourMap = filterByNeighbours threshold
92 $ mat2map distanceMat
93
94
95 -- Quentin
96 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
97 filterByNeighbours threshold distanceMap = filteredMap
98 where
99 indexes :: [Index]
100 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
101 filteredMap :: Map (Index, Index) Double
102 filteredMap = Map.fromList
103 $ List.concat
104 $ map (\idx ->
105 let selected = List.reverse
106 $ List.sortOn snd
107 $ Map.toList
108 $ Map.filter (> 0)
109 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
110 in List.take (round threshold) selected
111 ) indexes
112
113 cooc2graphWith' :: ToComId a
114 => Partitions a
115 -> Distance
116 -> Threshold
117 -> HashMap (NgramsTerm, NgramsTerm) Int
118 -> IO Graph
119 cooc2graphWith' doPartitions distance threshold myCooc = do
120 let
121 -- TODO remove below
122 theMatrix = Map.fromList
123 $ HashMap.toList myCooc
124
125 (ti, _) = createIndices theMatrix
126 tiSize = Map.size ti
127 myCooc' = toIndex ti theMatrix
128 matCooc = case distance of -- Shape of the Matrix
129 Conditional -> map2mat Triangle 0 tiSize
130 Distributional -> map2mat Square 0 tiSize
131 $ case distance of -- Removing the Diagonal ?
132 Conditional -> Map.filterWithKey (\(a,b) _ -> a /= b)
133 Distributional -> identity
134 $ Map.filter (>1) myCooc'
135
136 similarities = measure distance matCooc
137 links = round (let n :: Double = fromIntegral tiSize in n * log n)
138 distanceMap = Map.fromList
139 $ List.take links
140 $ List.sortOn snd
141 $ Map.toList
142 $ case distance of
143 Conditional -> Map.filter (> threshold)
144 Distributional -> Map.filter (> 0)
145 $ mat2map similarities
146
147 nodesApprox :: Int
148 nodesApprox = n'
149 where
150 (as, bs) = List.unzip $ Map.keys distanceMap
151 n' = Set.size $ Set.fromList $ as <> bs
152 ClustersParams rivers _level = clustersParams nodesApprox
153
154 saveAsFileDebug "debug/the-matrix" theMatrix
155 saveAsFileDebug "debug/my-cooc-prime" myCooc'
156 saveAsFileDebug "debug/mat-cooc" matCooc
157 saveAsFileDebug "debug/similarities" similarities
158 saveAsFileDebug "debug/links" links
159 saveAsFileDebug "debug/distanceMap" distanceMap
160 saveAsFileDebug "debug/nodesApprox" nodesApprox
161
162 printDebug "similarities" similarities
163
164 -- partitions <- if (Map.size distanceMap > 0)
165 -- then doPartitions distanceMap
166 -- else panic "Text.Flow: DistanceMap is empty"
167 partitions <- doPartitions distanceMap
168
169 let
170 -- bridgeness' = distanceMap
171 bridgeness' = trace ("Rivers: " <> show rivers)
172 $ bridgeness rivers partitions distanceMap
173
174 confluence' = confluence (Map.keys bridgeness') 3 True False
175
176 pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
177 myCooc' bridgeness' confluence' partitions
178
179
180
181 -- cooc2graph :: Distance
182 -- -> Threshold
183 -- -> (Map (Text, Text) Int)
184 -- -> IO Graph
185 -- cooc2graph distance threshold myCooc = do
186 -- printDebug "cooc2graph" distance
187 -- let
188 -- -- TODO remove below
189 -- theMatrix = Map.fromList $ HashMap.toList myCooc
190 -- (ti, _) = createIndices theMatrix
191 -- myCooc' = toIndex ti theMatrix
192 -- matCooc = map2mat 0 (Map.size ti)
193 -- $ Map.filterWithKey (\(a,b) _ -> a /= b)
194 -- $ Map.filter (> 1) myCooc'
195 -- distanceMat = measure distance matCooc
196 -- distanceMap = Map.filter (> threshold) $ mat2map distanceMat
197
198 -- nodesApprox :: Int
199 -- nodesApprox = n'
200 -- where
201 -- (as, bs) = List.unzip $ Map.keys distanceMap
202 -- n' = Set.size $ Set.fromList $ as <> bs
203 -- ClustersParams rivers _level = clustersParams nodesApprox
204
205 -- printDebug "Start" ("partitions" :: Text)
206 -- partitions <- if (Map.size distanceMap > 0)
207 -- -- then iLouvainMap 100 10 distanceMap
208 -- -- then hLouvain distanceMap
209 -- then doPartitions distanceMap
210 -- else panic "Text.Flow: DistanceMap is empty"
211 -- printDebug "End" ("partitions" :: Text)
212
213 -- let
214 -- -- bridgeness' = distanceMap
215 -- bridgeness' = trace ("Rivers: " <> show rivers)
216 -- $ bridgeness rivers partitions distanceMap
217 -- confluence' = confluence (Map.keys bridgeness') 3 True False
218
219 -- pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
220 -- myCooc' bridgeness' confluence' partitions
221
222 ------------------------------------------------------------------------
223 ------------------------------------------------------------------------
224 data ClustersParams = ClustersParams { bridgness :: Double
225 , louvain :: Text
226 } deriving (Show)
227
228 clustersParams :: Int -> ClustersParams
229 clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y
230 {- where
231 y | x < 100 = "0.000001"
232 | x < 350 = "0.000001"
233 | x < 500 = "0.000001"
234 | x < 1000 = "0.000001"
235 | otherwise = "1"
236 -}
237
238 ----------------------------------------------------------
239 -- | From data to Graph
240 data2graph :: ToComId a
241 => [(Text, Int)]
242 -> Map (Int, Int) Int
243 -> Map (Int, Int) Double
244 -> Map (Int, Int) Double
245 -> [a]
246 -> Graph
247 data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing
248 where
249
250 community_id_by_node_id = Map.fromList $ map nodeId2comId partitions
251
252 nodes = map (setCoord ForceAtlas labels bridge)
253 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
254 , node_type = Terms -- or Unknown
255 , node_id = cs (show n)
256 , node_label = l
257 , node_x_coord = 0
258 , node_y_coord = 0
259 , node_attributes =
260 Attributes { clust_default = maybe 0 identity
261 (Map.lookup n community_id_by_node_id) } }
262 )
263 | (l, n) <- labels
264 , Set.member n $ Set.fromList
265 $ List.concat
266 $ map (\((s,t),d) -> if d > 0 && s /=t then [s,t] else [])
267 $ Map.toList bridge
268 ]
269
270 edges = [ Edge { edge_source = cs (show s)
271 , edge_target = cs (show t)
272 , edge_weight = d
273 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
274 -- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
275 , edge_id = cs (show i)
276 }
277 | (i, ((s,t), d)) <- zip ([0..]::[Integer] )
278 (Map.toList bridge)
279 , s /= t, d > 0
280 ]
281
282
283 ------------------------------------------------------------------------
284
285 data Layout = KamadaKawai | ACP | ForceAtlas
286
287
288 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
289 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
290 where
291 (x,y) = f i
292
293
294 -- | ACP
295 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
296 setCoord l labels m (n,node) = node { node_x_coord = x
297 , node_y_coord = y
298 }
299 where
300 (x,y) = getCoord l labels m n
301
302
303 getCoord :: Ord a
304 => Layout
305 -> [(a, Int)]
306 -> Map (Int, Int) Double
307 -> Int
308 -> (Double, Double)
309 getCoord KamadaKawai _ _m _n = undefined -- layout m n
310
311 getCoord ForceAtlas _ _ n = (sin d, cos d)
312 where
313 d = fromIntegral n
314
315 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
316 $ Map.lookup n
317 $ pcaReduceTo (Dimension 2)
318 $ mapArray labels m
319 where
320 to2d :: Vec.Vector Double -> (Double, Double)
321 to2d v = (x',y')
322 where
323 ds = take 2 $ Vec.toList v
324 x' = head' "to2d" ds
325 y' = last' "to2d" ds
326
327 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
328 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
329 where
330 ns = map snd items
331
332 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
333 toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
334 ------------------------------------------------------------------------
335
336 -- | KamadaKawai Layout
337 -- TODO TEST: check labels, nodeId and coordinates
338 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
339 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
340 where
341 coord :: (Map Int (Double,Double))
342 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
343 --p = Layout.defaultLGL
344 p = Layout.kamadaKawai
345 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
346 -----------------------------------------------------------------------------
347 -----------------------------------------------------------------------------
348 -----------------------------------------------------------------------------
349 -- Debug
350 {-
351 -- measure logDistributional
352 dataDebug = map2mat Square (0::Int) 19 dataBug'
353
354 dataBug' :: Map (Int, Int) Int
355 dataBug' = Map.fromList [((0,0),28),((0,1),8),((0,2),6),((0,3),2),((0,5),4),((0,6),4),((0,7),2),((0,9),7),((0,10),4),((0,13),4),((0,14),2),((0,15),5),((0,16),8),((0,17),3),((1,1),28),((1,2),6),((1,3),7),((1,4),5),((1,5),7),((1,6),5),((1,7),2),((1,9),6),((1,10),7),((1,11),5),((1,13),6),((1,15),6),((1,16),14),((1,18),4),((2,2),39),((2,3),5),((2,4),4),((2,5),3),((2,6),4),((2,7),4),((2,8),3),((2,9),17),((2,10),4),((2,11),8),((2,12),2),((2,13),15),((2,14),4),((2,15),5),((2,16),21),((2,18),4),((3,3),48),((3,4),10),((3,5),7),((3,6),3),((3,7),7),((3,8),6),((3,9),12),((3,10),9),((3,11),8),((3,12),5),((3,13),15),((3,14),5),((3,15),9),((3,16),17),((3,18),4),((4,4),33),((4,5),2),((4,6),5),((4,7),7),((4,8),4),((4,9),6),((4,10),12),((4,11),8),((4,12),3),((4,13),16),((4,14),4),((4,15),4),((4,16),5),((4,17),2),((4,18),12),((5,5),27),((5,6),2),((5,8),3),((5,9),12),((5,10),6),((5,11),9),((5,13),4),((5,14),2),((5,15),7),((5,16),11),((5,18),4),((6,6),34),((6,7),4),((6,8),3),((6,9),12),((6,10),8),((6,11),2),((6,12),5),((6,13),6),((6,14),6),((6,15),5),((6,16),22),((6,17),8),((6,18),4),((7,7),27),((7,8),2),((7,9),6),((7,10),2),((7,11),4),((7,13),13),((7,15),2),((7,16),8),((7,17),6),((7,18),4),((8,8),30),((8,9),9),((8,10),6),((8,11),9),((8,12),6),((8,13),3),((8,14),3),((8,15),4),((8,16),15),((8,17),3),((8,18),5),((9,9),69),((9,10),9),((9,11),22),((9,12),15),((9,13),18),((9,14),10),((9,15),14),((9,16),48),((9,17),6),((9,18),9),((10,10),39),((10,11),15),((10,12),5),((10,13),11),((10,14),2),((10,15),4),((10,16),19),((10,17),3),((10,18),11),((11,11),48),((11,12),9),((11,13),20),((11,14),2),((11,15),13),((11,16),29),((11,18),13),((12,12),30),((12,13),4),((12,15),5),((12,16),16),((12,17),6),((12,18),2),((13,13),65),((13,14),10),((13,15),14),((13,16),23),((13,17),6),((13,18),10),((14,14),25),((14,16),9),((14,17),3),((14,18),3),((15,15),38),((15,16),17),((15,18),4),((16,16),99),((16,17),11),((16,18),14),((17,17),29),((18,18),23)]
356 -}