]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[FIX] map list size filtered (others terms do not appear any more)
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 -- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 import Debug.Trace (trace)
22 import GHC.Float (sin, cos)
23 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
24 import Gargantext.Core.Methods.Distances (Distance(..), measure)
25 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
26 import Gargantext.Core.Statistics
27 import Gargantext.Core.Viz.Graph
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, ClusterNode)
31 import Gargantext.Prelude
32 import IGraph.Random -- (Gen(..))
33 import qualified Data.HashMap.Strict as HashMap
34 import qualified Data.List as List
35 import qualified Data.Map as Map
36 import qualified Data.Set as Set
37 import qualified Data.Vector.Storable as Vec
38 import qualified IGraph as Igraph
39 import qualified IGraph.Algorithms.Layout as Layout
40
41
42 -------------------------------------------------------------
43
44 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
45 defaultClustering = spinglass 1
46
47 -------------------------------------------------------------
48
49 type Threshold = Double
50
51 cooc2graph' :: Ord t => Distance
52 -> Double
53 -> Map (t, t) Int
54 -> Map (Index, Index) Double
55 cooc2graph' distance threshold myCooc
56 = Map.filter (> threshold)
57 $ mat2map
58 $ measure distance
59 $ case distance of
60 Conditional -> map2mat Triangle 0 tiSize
61 Distributional -> map2mat Square 0 tiSize
62 $ Map.filter (> 1) myCooc'
63
64 where
65 (ti, _) = createIndices myCooc
66 tiSize = Map.size ti
67 myCooc' = toIndex ti myCooc
68
69
70 data PartitionMethod = Louvain | Spinglass
71
72 cooc2graphWith :: PartitionMethod
73 -> Distance
74 -> Threshold
75 -> HashMap (NgramsTerm, NgramsTerm) Int
76 -> IO Graph
77 cooc2graphWith Louvain = undefined -- TODO use IGraph bindings
78 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
79
80 cooc2graph'' :: Ord t => Distance
81 -> Double
82 -> Map (t, t) Int
83 -> Map (Index, Index) Double
84 cooc2graph'' distance threshold myCooc = neighbouMap
85 where
86 (ti, _) = createIndices myCooc
87 myCooc' = toIndex ti myCooc
88 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
89 distanceMat = measure distance matCooc
90 neighbouMap = filterByNeighbours threshold
91 $ mat2map distanceMat
92
93
94 -- Quentin
95 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
96 filterByNeighbours threshold distanceMap = filteredMap
97 where
98 indexes :: [Index]
99 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
100 filteredMap :: Map (Index, Index) Double
101 filteredMap = Map.fromList
102 $ List.concat
103 $ map (\idx ->
104 let selected = List.reverse
105 $ List.sortOn snd
106 $ Map.toList
107 $ Map.filter (> 0)
108 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
109 in List.take (round threshold) selected
110 ) indexes
111
112 cooc2graphWith' :: ToComId a
113 => Partitions a
114 -> Distance
115 -> Threshold
116 -> HashMap (NgramsTerm, NgramsTerm) Int
117 -> IO Graph
118 cooc2graphWith' doPartitions distance threshold myCooc = do
119 let
120 -- TODO remove below
121 theMatrix = Map.fromList
122 $ HashMap.toList myCooc
123
124 (ti, _) = createIndices theMatrix
125 tiSize = Map.size ti
126 myCooc' = toIndex ti theMatrix
127 matCooc = case distance of -- Shape of the Matrix
128 Conditional -> map2mat Triangle 0 tiSize
129 Distributional -> map2mat Square 0 tiSize
130 $ case distance of -- Removing the Diagonal ?
131 Conditional -> Map.filterWithKey (\(a,b) _ -> a /= b)
132 Distributional -> identity
133 $ Map.filter (>1) myCooc'
134
135 similarities = measure distance matCooc
136 links = round (let n :: Double = fromIntegral tiSize in n * log n)
137 distanceMap = Map.fromList
138 $ List.take links
139 $ List.sortOn snd
140 $ Map.toList
141 $ case distance of
142 Conditional -> Map.filter (> threshold)
143 Distributional -> Map.filter (> 0)
144 $ mat2map similarities
145
146 nodesApprox :: Int
147 nodesApprox = n'
148 where
149 (as, bs) = List.unzip $ Map.keys distanceMap
150 n' = Set.size $ Set.fromList $ as <> bs
151 ClustersParams rivers _level = clustersParams nodesApprox
152
153 printDebug "similarities" similarities
154
155 partitions <- if (Map.size distanceMap > 0)
156 then doPartitions distanceMap
157 else panic "Text.Flow: DistanceMap is empty"
158
159 let
160 -- bridgeness' = distanceMap
161 bridgeness' = trace ("Rivers: " <> show rivers)
162 $ bridgeness rivers partitions distanceMap
163
164 confluence' = confluence (Map.keys bridgeness') 3 True False
165
166 pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
167 myCooc' bridgeness' confluence' partitions
168
169
170
171 -- cooc2graph :: Distance
172 -- -> Threshold
173 -- -> (Map (Text, Text) Int)
174 -- -> IO Graph
175 -- cooc2graph distance threshold myCooc = do
176 -- printDebug "cooc2graph" distance
177 -- let
178 -- -- TODO remove below
179 -- theMatrix = Map.fromList $ HashMap.toList myCooc
180 -- (ti, _) = createIndices theMatrix
181 -- myCooc' = toIndex ti theMatrix
182 -- matCooc = map2mat 0 (Map.size ti)
183 -- $ Map.filterWithKey (\(a,b) _ -> a /= b)
184 -- $ Map.filter (> 1) myCooc'
185 -- distanceMat = measure distance matCooc
186 -- distanceMap = Map.filter (> threshold) $ mat2map distanceMat
187
188 -- nodesApprox :: Int
189 -- nodesApprox = n'
190 -- where
191 -- (as, bs) = List.unzip $ Map.keys distanceMap
192 -- n' = Set.size $ Set.fromList $ as <> bs
193 -- ClustersParams rivers _level = clustersParams nodesApprox
194
195 -- printDebug "Start" ("partitions" :: Text)
196 -- partitions <- if (Map.size distanceMap > 0)
197 -- -- then iLouvainMap 100 10 distanceMap
198 -- -- then hLouvain distanceMap
199 -- then doPartitions distanceMap
200 -- else panic "Text.Flow: DistanceMap is empty"
201 -- printDebug "End" ("partitions" :: Text)
202
203 -- let
204 -- -- bridgeness' = distanceMap
205 -- bridgeness' = trace ("Rivers: " <> show rivers)
206 -- $ bridgeness rivers partitions distanceMap
207 -- confluence' = confluence (Map.keys bridgeness') 3 True False
208
209 -- pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
210 -- myCooc' bridgeness' confluence' partitions
211
212 ------------------------------------------------------------------------
213 ------------------------------------------------------------------------
214 data ClustersParams = ClustersParams { bridgness :: Double
215 , louvain :: Text
216 } deriving (Show)
217
218 clustersParams :: Int -> ClustersParams
219 clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y
220 {- where
221 y | x < 100 = "0.000001"
222 | x < 350 = "0.000001"
223 | x < 500 = "0.000001"
224 | x < 1000 = "0.000001"
225 | otherwise = "1"
226 -}
227
228 ----------------------------------------------------------
229 -- | From data to Graph
230 data2graph :: ToComId a
231 => [(Text, Int)]
232 -> Map (Int, Int) Int
233 -> Map (Int, Int) Double
234 -> Map (Int, Int) Double
235 -> [a]
236 -> Graph
237 data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing
238 where
239
240 community_id_by_node_id = Map.fromList $ map nodeId2comId partitions
241
242 nodes = map (setCoord ForceAtlas labels bridge)
243 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
244 , node_type = Terms -- or Unknown
245 , node_id = cs (show n)
246 , node_label = l
247 , node_x_coord = 0
248 , node_y_coord = 0
249 , node_attributes =
250 Attributes { clust_default = maybe 0 identity
251 (Map.lookup n community_id_by_node_id) } }
252 )
253 | (l, n) <- labels
254 , Set.member n $ Set.fromList
255 $ List.concat
256 $ map (\((s,t),d) -> if d > 0 && s /=t then [s,t] else [])
257 $ Map.toList bridge
258 ]
259
260 edges = [ Edge { edge_source = cs (show s)
261 , edge_target = cs (show t)
262 , edge_weight = d
263 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
264 -- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
265 , edge_id = cs (show i)
266 }
267 | (i, ((s,t), d)) <- zip ([0..]::[Integer] )
268 (Map.toList bridge)
269 , s /= t, d > 0
270 ]
271
272
273 ------------------------------------------------------------------------
274
275 data Layout = KamadaKawai | ACP | ForceAtlas
276
277
278 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
279 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
280 where
281 (x,y) = f i
282
283
284 -- | ACP
285 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
286 setCoord l labels m (n,node) = node { node_x_coord = x
287 , node_y_coord = y
288 }
289 where
290 (x,y) = getCoord l labels m n
291
292
293 getCoord :: Ord a
294 => Layout
295 -> [(a, Int)]
296 -> Map (Int, Int) Double
297 -> Int
298 -> (Double, Double)
299 getCoord KamadaKawai _ _m _n = undefined -- layout m n
300
301 getCoord ForceAtlas _ _ n = (sin d, cos d)
302 where
303 d = fromIntegral n
304
305 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
306 $ Map.lookup n
307 $ pcaReduceTo (Dimension 2)
308 $ mapArray labels m
309 where
310 to2d :: Vec.Vector Double -> (Double, Double)
311 to2d v = (x',y')
312 where
313 ds = take 2 $ Vec.toList v
314 x' = head' "to2d" ds
315 y' = last' "to2d" ds
316
317 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
318 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
319 where
320 ns = map snd items
321
322 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
323 toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
324 ------------------------------------------------------------------------
325
326 -- | KamadaKawai Layout
327 -- TODO TEST: check labels, nodeId and coordinates
328 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
329 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
330 where
331 coord :: (Map Int (Double,Double))
332 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
333 --p = Layout.defaultLGL
334 p = Layout.kamadaKawai
335 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
336 -----------------------------------------------------------------------------
337 -----------------------------------------------------------------------------
338 -----------------------------------------------------------------------------
339 -- Debug
340 {-
341 -- measure logDistributional
342 dataDebug = map2mat Square (0::Int) 19 dataBug'
343
344 dataBug' :: Map (Int, Int) Int
345 dataBug' = Map.fromList [((0,0),28),((0,1),8),((0,2),6),((0,3),2),((0,5),4),((0,6),4),((0,7),2),((0,9),7),((0,10),4),((0,13),4),((0,14),2),((0,15),5),((0,16),8),((0,17),3),((1,1),28),((1,2),6),((1,3),7),((1,4),5),((1,5),7),((1,6),5),((1,7),2),((1,9),6),((1,10),7),((1,11),5),((1,13),6),((1,15),6),((1,16),14),((1,18),4),((2,2),39),((2,3),5),((2,4),4),((2,5),3),((2,6),4),((2,7),4),((2,8),3),((2,9),17),((2,10),4),((2,11),8),((2,12),2),((2,13),15),((2,14),4),((2,15),5),((2,16),21),((2,18),4),((3,3),48),((3,4),10),((3,5),7),((3,6),3),((3,7),7),((3,8),6),((3,9),12),((3,10),9),((3,11),8),((3,12),5),((3,13),15),((3,14),5),((3,15),9),((3,16),17),((3,18),4),((4,4),33),((4,5),2),((4,6),5),((4,7),7),((4,8),4),((4,9),6),((4,10),12),((4,11),8),((4,12),3),((4,13),16),((4,14),4),((4,15),4),((4,16),5),((4,17),2),((4,18),12),((5,5),27),((5,6),2),((5,8),3),((5,9),12),((5,10),6),((5,11),9),((5,13),4),((5,14),2),((5,15),7),((5,16),11),((5,18),4),((6,6),34),((6,7),4),((6,8),3),((6,9),12),((6,10),8),((6,11),2),((6,12),5),((6,13),6),((6,14),6),((6,15),5),((6,16),22),((6,17),8),((6,18),4),((7,7),27),((7,8),2),((7,9),6),((7,10),2),((7,11),4),((7,13),13),((7,15),2),((7,16),8),((7,17),6),((7,18),4),((8,8),30),((8,9),9),((8,10),6),((8,11),9),((8,12),6),((8,13),3),((8,14),3),((8,15),4),((8,16),15),((8,17),3),((8,18),5),((9,9),69),((9,10),9),((9,11),22),((9,12),15),((9,13),18),((9,14),10),((9,15),14),((9,16),48),((9,17),6),((9,18),9),((10,10),39),((10,11),15),((10,12),5),((10,13),11),((10,14),2),((10,15),4),((10,16),19),((10,17),3),((10,18),11),((11,11),48),((11,12),9),((11,13),20),((11,14),2),((11,15),13),((11,16),29),((11,18),13),((12,12),30),((12,13),4),((12,15),5),((12,16),16),((12,17),6),((12,18),2),((13,13),65),((13,14),10),((13,15),14),((13,16),23),((13,17),6),((13,18),10),((14,14),25),((14,16),9),((14,17),3),((14,18),3),((15,15),38),((15,16),17),((15,18),4),((16,16),99),((16,17),11),((16,18),14),((17,17),29),((18,18),23)]
346 -}