]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[VERSION] +1 to 0.0.4.8.6
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 -- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 import Debug.Trace (trace)
22 import GHC.Float (sin, cos)
23 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
24 import Gargantext.Core.Methods.Distances (Distance(..), measure)
25 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
26 import Gargantext.Core.Statistics
27 import Gargantext.Core.Viz.Graph
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, ClusterNode)
31 import Gargantext.Prelude
32 import IGraph.Random -- (Gen(..))
33 import qualified Data.HashMap.Strict as HashMap
34 import qualified Data.List as List
35 import qualified Data.Map as Map
36 import qualified Data.Set as Set
37 import qualified Data.Vector.Storable as Vec
38 import qualified IGraph as Igraph
39 import qualified IGraph.Algorithms.Layout as Layout
40
41
42 -------------------------------------------------------------
43
44 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
45 defaultClustering = spinglass 1
46
47 -------------------------------------------------------------
48
49 type Threshold = Double
50
51 cooc2graph' :: Ord t => Distance
52 -> Double
53 -> Map (t, t) Int
54 -> Map (Index, Index) Double
55 cooc2graph' distance threshold myCooc
56 = Map.filter (> threshold)
57 $ mat2map
58 $ measure distance
59 $ case distance of
60 Conditional -> map2mat Triangle 0 tiSize
61 Distributional -> map2mat Square 0 tiSize
62 $ Map.filter (> 1) myCooc'
63
64 where
65 (ti, _) = createIndices myCooc
66 tiSize = Map.size ti
67 myCooc' = toIndex ti myCooc
68
69
70 data PartitionMethod = Louvain | Spinglass | Bac
71
72 -- | coocurrences graph computation
73 cooc2graphWith :: PartitionMethod
74 -> Distance
75 -> Threshold
76 -> HashMap (NgramsTerm, NgramsTerm) Int
77 -> IO Graph
78 cooc2graphWith Louvain = undefined -- TODO use IGraph bindings
79 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
80 cooc2graphWith Bac = undefined -- cooc2graphWith' BAC.defaultClustering
81
82 cooc2graph'' :: Ord t => Distance
83 -> Double
84 -> Map (t, t) Int
85 -> Map (Index, Index) Double
86 cooc2graph'' distance threshold myCooc = neighbourMap
87 where
88 (ti, _) = createIndices myCooc
89 myCooc' = toIndex ti myCooc
90 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
91 distanceMat = measure distance matCooc
92 neighbourMap = filterByNeighbours threshold
93 $ mat2map distanceMat
94
95
96 -- Quentin
97 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
98 filterByNeighbours threshold distanceMap = filteredMap
99 where
100 indexes :: [Index]
101 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
102 filteredMap :: Map (Index, Index) Double
103 filteredMap = Map.fromList
104 $ List.concat
105 $ map (\idx ->
106 let selected = List.reverse
107 $ List.sortOn snd
108 $ Map.toList
109 $ Map.filter (> 0)
110 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
111 in List.take (round threshold) selected
112 ) indexes
113
114
115 doDistanceMap :: Distance
116 -> Threshold
117 -> HashMap (NgramsTerm, NgramsTerm) Int
118 -> (Map (Int,Int) Double, Map (Index, Index) Int, Map NgramsTerm Index)
119 doDistanceMap distance threshold myCooc = (distanceMap, myCooc', ti)
120 where
121 -- TODO remove below
122 theMatrix = Map.fromList
123 $ HashMap.toList myCooc
124
125 (ti, _) = createIndices theMatrix
126 tiSize = Map.size ti
127 myCooc' = toIndex ti theMatrix
128 matCooc = case distance of -- Shape of the Matrix
129 Conditional -> map2mat Triangle 0 tiSize
130 Distributional -> map2mat Square 0 tiSize
131 $ case distance of -- Removing the Diagonal ?
132 Conditional -> Map.filterWithKey (\(a,b) _ -> a /= b)
133 Distributional -> identity
134 $ Map.filter (>1) myCooc'
135
136 similarities = measure distance matCooc
137 links = round (let n :: Double = fromIntegral tiSize in n * log n)
138
139 distanceMap = Map.fromList $ List.take links
140 $ List.sortOn snd
141 $ Map.toList
142 $ case distance of
143 Conditional -> Map.filter (> threshold)
144 Distributional -> Map.filter (> 0)
145 $ mat2map similarities
146
147 cooc2graphWith' :: ToComId a
148 => Partitions a
149 -> Distance
150 -> Threshold
151 -> HashMap (NgramsTerm, NgramsTerm) Int
152 -> IO Graph
153 cooc2graphWith' doPartitions distance threshold myCooc = do
154 let
155 (distanceMap, myCooc', ti) = doDistanceMap distance threshold myCooc
156
157 nodesApprox :: Int
158 nodesApprox = n'
159 where
160 (as, bs) = List.unzip $ Map.keys distanceMap
161 n' = Set.size $ Set.fromList $ as <> bs
162 ClustersParams rivers _level = clustersParams nodesApprox
163
164 {- -- Debug
165 saveAsFileDebug "debug/distanceMap" distanceMap
166 printDebug "similarities" similarities
167 -}
168
169 partitions <- if (Map.size distanceMap > 0)
170 then doPartitions distanceMap
171 else panic "Text.Flow: DistanceMap is empty"
172
173 let
174 -- bridgeness' = distanceMap
175 bridgeness' = trace ("Rivers: " <> show rivers)
176 $ bridgeness rivers partitions distanceMap
177
178 confluence' = confluence (Map.keys bridgeness') 3 True False
179
180 pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
181 myCooc' bridgeness' confluence' partitions
182
183 ------------------------------------------------------------------------
184 ------------------------------------------------------------------------
185 data ClustersParams = ClustersParams { bridgness :: Double
186 , louvain :: Text
187 } deriving (Show)
188
189 clustersParams :: Int -> ClustersParams
190 clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y
191 {- where
192 y | x < 100 = "0.000001"
193 | x < 350 = "0.000001"
194 | x < 500 = "0.000001"
195 | x < 1000 = "0.000001"
196 | otherwise = "1"
197 -}
198
199 ----------------------------------------------------------
200 -- | From data to Graph
201 data2graph :: ToComId a
202 => [(Text, Int)]
203 -> Map (Int, Int) Int
204 -> Map (Int, Int) Double
205 -> Map (Int, Int) Double
206 -> [a]
207 -> Graph
208 data2graph labels coocs bridge conf partitions = Graph { _graph_nodes = nodes
209 , _graph_edges = edges
210 , _graph_metadata = Nothing }
211 where
212
213 community_id_by_node_id = Map.fromList $ map nodeId2comId partitions
214
215 nodes = map (setCoord ForceAtlas labels bridge)
216 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
217 , node_type = Terms -- or Unknown
218 , node_id = cs (show n)
219 , node_label = l
220 , node_x_coord = 0
221 , node_y_coord = 0
222 , node_attributes =
223 Attributes { clust_default = maybe 0 identity
224 (Map.lookup n community_id_by_node_id) }
225 , node_children = [] }
226 )
227 | (l, n) <- labels
228 , Set.member n $ Set.fromList
229 $ List.concat
230 $ map (\((s,t),d) -> if d > 0 && s /=t then [s,t] else [])
231 $ Map.toList bridge
232 ]
233
234 edges = [ Edge { edge_source = cs (show s)
235 , edge_target = cs (show t)
236 , edge_weight = d
237 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
238 -- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
239 , edge_id = cs (show i)
240 }
241 | (i, ((s,t), d)) <- zip ([0..]::[Integer] )
242 (Map.toList bridge)
243 , s /= t, d > 0
244 ]
245
246
247 ------------------------------------------------------------------------
248
249 data Layout = KamadaKawai | ACP | ForceAtlas
250
251
252 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
253 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
254 where
255 (x,y) = f i
256
257
258 -- | ACP
259 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
260 setCoord l labels m (n,node) = node { node_x_coord = x
261 , node_y_coord = y
262 }
263 where
264 (x,y) = getCoord l labels m n
265
266
267 getCoord :: Ord a
268 => Layout
269 -> [(a, Int)]
270 -> Map (Int, Int) Double
271 -> Int
272 -> (Double, Double)
273 getCoord KamadaKawai _ _m _n = undefined -- layout m n
274
275 getCoord ForceAtlas _ _ n = (sin d, cos d)
276 where
277 d = fromIntegral n
278
279 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
280 $ Map.lookup n
281 $ pcaReduceTo (Dimension 2)
282 $ mapArray labels m
283 where
284 to2d :: Vec.Vector Double -> (Double, Double)
285 to2d v = (x',y')
286 where
287 ds = take 2 $ Vec.toList v
288 x' = head' "to2d" ds
289 y' = last' "to2d" ds
290
291 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
292 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
293 where
294 ns = map snd items
295
296 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
297 toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
298 ------------------------------------------------------------------------
299
300 -- | KamadaKawai Layout
301 -- TODO TEST: check labels, nodeId and coordinates
302 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
303 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
304 where
305 coord :: (Map Int (Double,Double))
306 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
307 --p = Layout.defaultLGL
308 p = Layout.kamadaKawai
309 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
310 -----------------------------------------------------------------------------
311 -----------------------------------------------------------------------------
312 -----------------------------------------------------------------------------
313 -- Debug
314 {-
315 -- measure logDistributional
316 dataDebug = map2mat Square (0::Int) 19 dataBug'
317
318 dataBug' :: Map (Int, Int) Int
319 dataBug' = Map.fromList [((0,0),28),((0,1),8),((0,2),6),((0,3),2),((0,5),4),((0,6),4),((0,7),2),((0,9),7),((0,10),4),((0,13),4),((0,14),2),((0,15),5),((0,16),8),((0,17),3),((1,1),28),((1,2),6),((1,3),7),((1,4),5),((1,5),7),((1,6),5),((1,7),2),((1,9),6),((1,10),7),((1,11),5),((1,13),6),((1,15),6),((1,16),14),((1,18),4),((2,2),39),((2,3),5),((2,4),4),((2,5),3),((2,6),4),((2,7),4),((2,8),3),((2,9),17),((2,10),4),((2,11),8),((2,12),2),((2,13),15),((2,14),4),((2,15),5),((2,16),21),((2,18),4),((3,3),48),((3,4),10),((3,5),7),((3,6),3),((3,7),7),((3,8),6),((3,9),12),((3,10),9),((3,11),8),((3,12),5),((3,13),15),((3,14),5),((3,15),9),((3,16),17),((3,18),4),((4,4),33),((4,5),2),((4,6),5),((4,7),7),((4,8),4),((4,9),6),((4,10),12),((4,11),8),((4,12),3),((4,13),16),((4,14),4),((4,15),4),((4,16),5),((4,17),2),((4,18),12),((5,5),27),((5,6),2),((5,8),3),((5,9),12),((5,10),6),((5,11),9),((5,13),4),((5,14),2),((5,15),7),((5,16),11),((5,18),4),((6,6),34),((6,7),4),((6,8),3),((6,9),12),((6,10),8),((6,11),2),((6,12),5),((6,13),6),((6,14),6),((6,15),5),((6,16),22),((6,17),8),((6,18),4),((7,7),27),((7,8),2),((7,9),6),((7,10),2),((7,11),4),((7,13),13),((7,15),2),((7,16),8),((7,17),6),((7,18),4),((8,8),30),((8,9),9),((8,10),6),((8,11),9),((8,12),6),((8,13),3),((8,14),3),((8,15),4),((8,16),15),((8,17),3),((8,18),5),((9,9),69),((9,10),9),((9,11),22),((9,12),15),((9,13),18),((9,14),10),((9,15),14),((9,16),48),((9,17),6),((9,18),9),((10,10),39),((10,11),15),((10,12),5),((10,13),11),((10,14),2),((10,15),4),((10,16),19),((10,17),3),((10,18),11),((11,11),48),((11,12),9),((11,13),20),((11,14),2),((11,15),13),((11,16),29),((11,18),13),((12,12),30),((12,13),4),((12,15),5),((12,16),16),((12,17),6),((12,18),2),((13,13),65),((13,14),10),((13,15),14),((13,16),23),((13,17),6),((13,18),10),((14,14),25),((14,16),9),((14,17),3),((14,18),3),((15,15),38),((15,16),17),((15,18),4),((16,16),99),((16,17),11),((16,18),14),((17,17),29),((18,18),23)]
320 -}