]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[FEAT] connnecting new clustering algorithm
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 -- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 import Debug.Trace (trace)
22 import GHC.Float (sin, cos)
23 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
24 import Gargantext.Core.Methods.Distances (Distance(..), measure)
25 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
26 import Gargantext.Core.Statistics
27 import Gargantext.Core.Viz.Graph
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
31 import Gargantext.Prelude
32 import Graph.Types (ClusterNode)
33 import qualified Graph.BAC.ProxemyOptim as BAC
34 import IGraph.Random -- (Gen(..))
35 import qualified Data.HashMap.Strict as HashMap
36 import qualified Data.List as List
37 import qualified Data.Map as Map
38 import qualified Data.Set as Set
39 import qualified Data.Vector.Storable as Vec
40 import qualified IGraph as Igraph
41 import qualified IGraph.Algorithms.Layout as Layout
42
43
44 -------------------------------------------------------------
45 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
46 defaultClustering x = pure $ BAC.defaultClustering x
47
48 -------------------------------------------------------------
49 type Threshold = Double
50
51 cooc2graph' :: Ord t => Distance
52 -> Double
53 -> Map (t, t) Int
54 -> Map (Index, Index) Double
55 cooc2graph' distance threshold myCooc
56 = Map.filter (> threshold)
57 $ mat2map
58 $ measure distance
59 $ case distance of
60 Conditional -> map2mat Triangle 0 tiSize
61 Distributional -> map2mat Square 0 tiSize
62 $ Map.filter (> 1) myCooc'
63
64 where
65 (ti, _) = createIndices myCooc
66 tiSize = Map.size ti
67 myCooc' = toIndex ti myCooc
68
69
70 data PartitionMethod = Louvain | Spinglass
71
72 -- | coocurrences graph computation
73 cooc2graphWith :: PartitionMethod
74 -> Distance
75 -> Threshold
76 -> HashMap (NgramsTerm, NgramsTerm) Int
77 -> IO Graph
78 cooc2graphWith Louvain = undefined -- TODO use IGraph bindings
79 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
80
81 cooc2graph'' :: Ord t => Distance
82 -> Double
83 -> Map (t, t) Int
84 -> Map (Index, Index) Double
85 cooc2graph'' distance threshold myCooc = neighbourMap
86 where
87 (ti, _) = createIndices myCooc
88 myCooc' = toIndex ti myCooc
89 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
90 distanceMat = measure distance matCooc
91 neighbourMap = filterByNeighbours threshold
92 $ mat2map distanceMat
93
94
95 -- Quentin
96 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
97 filterByNeighbours threshold distanceMap = filteredMap
98 where
99 indexes :: [Index]
100 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
101 filteredMap :: Map (Index, Index) Double
102 filteredMap = Map.fromList
103 $ List.concat
104 $ map (\idx ->
105 let selected = List.reverse
106 $ List.sortOn snd
107 $ Map.toList
108 $ Map.filter (> 0)
109 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
110 in List.take (round threshold) selected
111 ) indexes
112
113
114 doDistanceMap :: Distance
115 -> Threshold
116 -> HashMap (NgramsTerm, NgramsTerm) Int
117 -> (Map (Int,Int) Double, Map (Index, Index) Int, Map NgramsTerm Index)
118 doDistanceMap distance threshold myCooc = (distanceMap, myCooc', ti)
119 where
120 -- TODO remove below
121 theMatrix = Map.fromList
122 $ HashMap.toList myCooc
123
124 (ti, _) = createIndices theMatrix
125 tiSize = Map.size ti
126 myCooc' = toIndex ti theMatrix
127 matCooc = case distance of -- Shape of the Matrix
128 Conditional -> map2mat Triangle 0 tiSize
129 Distributional -> map2mat Square 0 tiSize
130 $ case distance of -- Removing the Diagonal ?
131 Conditional -> Map.filterWithKey (\(a,b) _ -> a /= b)
132 Distributional -> identity
133 $ Map.filter (>1) myCooc'
134
135 similarities = measure distance matCooc
136 links = round (let n :: Double = fromIntegral tiSize in n * log n)
137
138 distanceMap = Map.fromList $ List.take links
139 $ List.sortOn snd
140 $ Map.toList
141 $ case distance of
142 Conditional -> Map.filter (> threshold)
143 Distributional -> Map.filter (> 0)
144 $ mat2map similarities
145
146 cooc2graphWith' :: ToComId a
147 => Partitions a
148 -> Distance
149 -> Threshold
150 -> HashMap (NgramsTerm, NgramsTerm) Int
151 -> IO Graph
152 cooc2graphWith' doPartitions distance threshold myCooc = do
153 let
154 (distanceMap, myCooc', ti) = doDistanceMap distance threshold myCooc
155
156 nodesApprox :: Int
157 nodesApprox = n'
158 where
159 (as, bs) = List.unzip $ Map.keys distanceMap
160 n' = Set.size $ Set.fromList $ as <> bs
161 ClustersParams rivers _level = clustersParams nodesApprox
162
163 {- -- Debug
164 saveAsFileDebug "debug/distanceMap" distanceMap
165 printDebug "similarities" similarities
166 -}
167
168 partitions <- if (Map.size distanceMap > 0)
169 then doPartitions distanceMap
170 else panic "Text.Flow: DistanceMap is empty"
171
172 let
173 -- bridgeness' = distanceMap
174 bridgeness' = trace ("Rivers: " <> show rivers)
175 $ bridgeness rivers partitions distanceMap
176
177 confluence' = confluence (Map.keys bridgeness') 3 True False
178
179 pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
180 myCooc' bridgeness' confluence' partitions
181
182
183 ------------------------------------------------------------------------
184 ------------------------------------------------------------------------
185 data ClustersParams = ClustersParams { bridgness :: Double
186 , louvain :: Text
187 } deriving (Show)
188
189 clustersParams :: Int -> ClustersParams
190 clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y
191 {- where
192 y | x < 100 = "0.000001"
193 | x < 350 = "0.000001"
194 | x < 500 = "0.000001"
195 | x < 1000 = "0.000001"
196 | otherwise = "1"
197 -}
198
199 ----------------------------------------------------------
200 -- | From data to Graph
201 data2graph :: ToComId a
202 => [(Text, Int)]
203 -> Map (Int, Int) Int
204 -> Map (Int, Int) Double
205 -> Map (Int, Int) Double
206 -> [a]
207 -> Graph
208 data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing
209 where
210
211 community_id_by_node_id = Map.fromList $ map nodeId2comId partitions
212
213 nodes = map (setCoord ForceAtlas labels bridge)
214 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
215 , node_type = Terms -- or Unknown
216 , node_id = cs (show n)
217 , node_label = l
218 , node_x_coord = 0
219 , node_y_coord = 0
220 , node_attributes =
221 Attributes { clust_default = maybe 0 identity
222 (Map.lookup n community_id_by_node_id) } }
223 )
224 | (l, n) <- labels
225 , Set.member n $ Set.fromList
226 $ List.concat
227 $ map (\((s,t),d) -> if d > 0 && s /=t then [s,t] else [])
228 $ Map.toList bridge
229 ]
230
231 edges = [ Edge { edge_source = cs (show s)
232 , edge_target = cs (show t)
233 , edge_weight = d
234 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
235 -- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
236 , edge_id = cs (show i)
237 }
238 | (i, ((s,t), d)) <- zip ([0..]::[Integer] )
239 (Map.toList bridge)
240 , s /= t, d > 0
241 ]
242
243
244 ------------------------------------------------------------------------
245
246 data Layout = KamadaKawai | ACP | ForceAtlas
247
248
249 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
250 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
251 where
252 (x,y) = f i
253
254
255 -- | ACP
256 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
257 setCoord l labels m (n,node) = node { node_x_coord = x
258 , node_y_coord = y
259 }
260 where
261 (x,y) = getCoord l labels m n
262
263
264 getCoord :: Ord a
265 => Layout
266 -> [(a, Int)]
267 -> Map (Int, Int) Double
268 -> Int
269 -> (Double, Double)
270 getCoord KamadaKawai _ _m _n = undefined -- layout m n
271
272 getCoord ForceAtlas _ _ n = (sin d, cos d)
273 where
274 d = fromIntegral n
275
276 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
277 $ Map.lookup n
278 $ pcaReduceTo (Dimension 2)
279 $ mapArray labels m
280 where
281 to2d :: Vec.Vector Double -> (Double, Double)
282 to2d v = (x',y')
283 where
284 ds = take 2 $ Vec.toList v
285 x' = head' "to2d" ds
286 y' = last' "to2d" ds
287
288 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
289 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
290 where
291 ns = map snd items
292
293 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
294 toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
295 ------------------------------------------------------------------------
296
297 -- | KamadaKawai Layout
298 -- TODO TEST: check labels, nodeId and coordinates
299 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
300 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
301 where
302 coord :: (Map Int (Double,Double))
303 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
304 --p = Layout.defaultLGL
305 p = Layout.kamadaKawai
306 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
307 -----------------------------------------------------------------------------
308 -----------------------------------------------------------------------------
309 -----------------------------------------------------------------------------
310 -- Debug
311 {-
312 -- measure logDistributional
313 dataDebug = map2mat Square (0::Int) 19 dataBug'
314
315 dataBug' :: Map (Int, Int) Int
316 dataBug' = Map.fromList [((0,0),28),((0,1),8),((0,2),6),((0,3),2),((0,5),4),((0,6),4),((0,7),2),((0,9),7),((0,10),4),((0,13),4),((0,14),2),((0,15),5),((0,16),8),((0,17),3),((1,1),28),((1,2),6),((1,3),7),((1,4),5),((1,5),7),((1,6),5),((1,7),2),((1,9),6),((1,10),7),((1,11),5),((1,13),6),((1,15),6),((1,16),14),((1,18),4),((2,2),39),((2,3),5),((2,4),4),((2,5),3),((2,6),4),((2,7),4),((2,8),3),((2,9),17),((2,10),4),((2,11),8),((2,12),2),((2,13),15),((2,14),4),((2,15),5),((2,16),21),((2,18),4),((3,3),48),((3,4),10),((3,5),7),((3,6),3),((3,7),7),((3,8),6),((3,9),12),((3,10),9),((3,11),8),((3,12),5),((3,13),15),((3,14),5),((3,15),9),((3,16),17),((3,18),4),((4,4),33),((4,5),2),((4,6),5),((4,7),7),((4,8),4),((4,9),6),((4,10),12),((4,11),8),((4,12),3),((4,13),16),((4,14),4),((4,15),4),((4,16),5),((4,17),2),((4,18),12),((5,5),27),((5,6),2),((5,8),3),((5,9),12),((5,10),6),((5,11),9),((5,13),4),((5,14),2),((5,15),7),((5,16),11),((5,18),4),((6,6),34),((6,7),4),((6,8),3),((6,9),12),((6,10),8),((6,11),2),((6,12),5),((6,13),6),((6,14),6),((6,15),5),((6,16),22),((6,17),8),((6,18),4),((7,7),27),((7,8),2),((7,9),6),((7,10),2),((7,11),4),((7,13),13),((7,15),2),((7,16),8),((7,17),6),((7,18),4),((8,8),30),((8,9),9),((8,10),6),((8,11),9),((8,12),6),((8,13),3),((8,14),3),((8,15),4),((8,16),15),((8,17),3),((8,18),5),((9,9),69),((9,10),9),((9,11),22),((9,12),15),((9,13),18),((9,14),10),((9,15),14),((9,16),48),((9,17),6),((9,18),9),((10,10),39),((10,11),15),((10,12),5),((10,13),11),((10,14),2),((10,15),4),((10,16),19),((10,17),3),((10,18),11),((11,11),48),((11,12),9),((11,13),20),((11,14),2),((11,15),13),((11,16),29),((11,18),13),((12,12),30),((12,13),4),((12,15),5),((12,16),16),((12,17),6),((12,18),2),((13,13),65),((13,14),10),((13,15),14),((13,16),23),((13,17),6),((13,18),10),((14,14),25),((14,16),9),((14,17),3),((14,18),3),((15,15),38),((15,16),17),((15,18),4),((16,16),99),((16,17),11),((16,18),14),((17,17),29),((18,18),23)]
317 -}