]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
fix
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.HashMap.Strict (HashMap)
18 import Data.Map (Map)
19 import Data.Maybe (fromMaybe)
20 import GHC.Float (sin, cos)
21 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
22 import Gargantext.Core.Methods.Distances (Distance(..), measure)
23 import Gargantext.Core.Methods.Distances.Conditional (conditional)
24 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
25 import Gargantext.Core.Statistics
26 import Gargantext.Core.Viz.Graph
27 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
28 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
29 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
30 import Gargantext.Core.Viz.Graph.Types (ClusterNode)
31 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
32 import Gargantext.Prelude
33 import IGraph.Random -- (Gen(..))
34 import qualified Data.HashMap.Strict as HashMap
35 import qualified Data.List as List
36 import qualified Data.Map as Map
37 import qualified Data.Set as Set
38 import qualified Data.Vector.Storable as Vec
39 import qualified IGraph as Igraph
40 import qualified IGraph.Algorithms.Layout as Layout
41
42
43 -------------------------------------------------------------
44 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
45 -- defaultClustering x = pure $ BAC.defaultClustering x
46 defaultClustering x = spinglass 1 x
47
48 -------------------------------------------------------------
49 type Threshold = Double
50
51
52 cooc2graph' :: Ord t => Distance
53 -> Double
54 -> Map (t, t) Int
55 -> Map (Index, Index) Double
56 cooc2graph' distance threshold myCooc
57 = Map.filter (> threshold)
58 $ mat2map
59 $ measure distance
60 $ case distance of
61 Conditional -> map2mat Triangle 0 tiSize
62 Distributional -> map2mat Square 0 tiSize
63 $ Map.filter (> 1) myCooc'
64
65 where
66 (ti, _) = createIndices myCooc
67 tiSize = Map.size ti
68 myCooc' = toIndex ti myCooc
69
70
71 data PartitionMethod = Louvain | Spinglass
72 -- TODO Bac
73
74 -- coocurrences graph computation
75 cooc2graphWith :: PartitionMethod
76 -> Distance
77 -> Threshold
78 -> HashMap (NgramsTerm, NgramsTerm) Int
79 -> IO Graph
80 cooc2graphWith Louvain = undefined
81 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
82 -- cooc2graphWith Bac = cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
83
84
85 cooc2graphWith' :: ToComId a
86 => Partitions a
87 -> Distance
88 -> Threshold
89 -> HashMap (NgramsTerm, NgramsTerm) Int
90 -> IO Graph
91 cooc2graphWith' doPartitions distance threshold myCooc = do
92 let
93 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
94
95 {- -- Debug
96 saveAsFileDebug "debug/distanceMap" distanceMap
97 printDebug "similarities" similarities
98 -}
99
100 partitions <- if (Map.size distanceMap > 0)
101 then doPartitions distanceMap
102 else panic "Text.Flow: DistanceMap is empty"
103
104 let
105 nodesApprox :: Int
106 nodesApprox = n'
107 where
108 (as, bs) = List.unzip $ Map.keys distanceMap
109 n' = Set.size $ Set.fromList $ as <> bs
110 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
111 confluence' = confluence (Map.keys bridgeness') 3 True False
112
113 pure $ data2graph ti diag bridgeness' confluence' partitions
114
115
116 doDistanceMap :: Distance
117 -> Threshold
118 -> HashMap (NgramsTerm, NgramsTerm) Int
119 -> ( Map (Int,Int) Double
120 , Map (Index, Index) Int
121 , Map NgramsTerm Index
122 )
123 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
124 where
125 -- TODO remove below
126 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
127 $ Map.fromList
128 $ HashMap.toList myCooc
129
130 (ti, _it) = createIndices theMatrix
131 tiSize = Map.size ti
132
133 {-
134 matCooc = case distance of -- Shape of the Matrix
135 Conditional -> map2mat Triangle 0 tiSize
136 Distributional -> map2mat Square 0 tiSize
137 $ toIndex ti theMatrix
138 similarities = measure distance matCooc
139 -}
140
141 similarities = measure Distributional
142 $ map2mat Square 0 tiSize
143 $ toIndex ti theMatrix
144
145 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
146
147 distanceMap = Map.fromList
148 $ List.take links
149 $ List.reverse
150 $ List.sortOn snd
151 $ Map.toList
152 $ edgesFilter
153 $ Map.filter (> threshold)
154 $ mat2map similarities
155
156 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
157 where
158 myCooc' = Map.fromList $ HashMap.toList myCooc
159 (ti, _it) = createIndices myCooc'
160 tiSize = Map.size ti
161
162 links = round (let n :: Double = fromIntegral tiSize in n * log n)
163
164 distanceMap = toIndex ti
165 $ Map.fromList
166 $ List.take links
167 $ List.sortOn snd
168 $ HashMap.toList
169 $ HashMap.filter (> threshold)
170 $ conditional myCooc
171
172 ----------------------------------------------------------
173 -- | From data to Graph
174
175 type Occurrences = Int
176
177 data2graph :: ToComId a
178 => Map NgramsTerm Int
179 -> Map (Int, Int) Occurrences
180 -> Map (Int, Int) Double
181 -> Map (Int, Int) Double
182 -> [a]
183 -> Graph
184 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
185 , _graph_edges = edges
186 , _graph_metadata = Nothing
187 }
188 where
189
190 nodes = map (setCoord ForceAtlas labels bridge)
191 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
192 , node_type = Terms -- or Unknown
193 , node_id = cs (show n)
194 , node_label = unNgramsTerm l
195 , node_x_coord = 0
196 , node_y_coord = 0
197 , node_attributes = Attributes { clust_default = fromMaybe 0
198 (Map.lookup n community_id_by_node_id)
199 }
200 , node_children = [] }
201 )
202 | (l, n) <- labels
203 , Set.member n nodesWithScores
204 ]
205
206 edges = [ Edge { edge_source = cs (show s)
207 , edge_target = cs (show t)
208 , edge_weight = weight
209 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
210 , edge_id = cs (show i)
211 }
212 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
213 , s /= t
214 , weight > 0
215 ]
216
217 community_id_by_node_id = Map.fromList
218 $ map nodeId2comId partitions
219
220 labels = Map.toList labels'
221
222 nodesWithScores = Set.fromList
223 $ List.concat
224 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
225 $ Map.toList bridge
226
227
228 ------------------------------------------------------------------------
229
230 data Layout = KamadaKawai | ACP | ForceAtlas
231
232
233 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
234 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
235 where
236 (x,y) = f i
237
238
239 -- | ACP
240 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
241 setCoord l labels m (n,node) = node { node_x_coord = x
242 , node_y_coord = y
243 }
244 where
245 (x,y) = getCoord l labels m n
246
247
248 getCoord :: Ord a
249 => Layout
250 -> [(a, Int)]
251 -> Map (Int, Int) Double
252 -> Int
253 -> (Double, Double)
254 getCoord KamadaKawai _ _m _n = undefined -- layout m n
255
256 getCoord ForceAtlas _ _ n = (sin d, cos d)
257 where
258 d = fromIntegral n
259
260 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
261 $ Map.lookup n
262 $ pcaReduceTo (Dimension 2)
263 $ mapArray labels m
264 where
265 to2d :: Vec.Vector Double -> (Double, Double)
266 to2d v = (x',y')
267 where
268 ds = take 2 $ Vec.toList v
269 x' = head' "to2d" ds
270 y' = last' "to2d" ds
271
272 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
273 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
274 where
275 ns = map snd items
276
277 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
278 toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
279 ------------------------------------------------------------------------
280
281 -- | KamadaKawai Layout
282 -- TODO TEST: check labels, nodeId and coordinates
283 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
284 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
285 where
286 coord :: (Map Int (Double,Double))
287 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
288 --p = Layout.defaultLGL
289 p = Layout.kamadaKawai
290 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
291
292 -----------------------------------------------------------------------------
293 -- MISC Tools
294 cooc2graph'' :: Ord t => Distance
295 -> Double
296 -> Map (t, t) Int
297 -> Map (Index, Index) Double
298 cooc2graph'' distance threshold myCooc = neighbourMap
299 where
300 (ti, _) = createIndices myCooc
301 myCooc' = toIndex ti myCooc
302 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
303 distanceMat = measure distance matCooc
304 neighbourMap = filterByNeighbours threshold
305 $ mat2map distanceMat
306
307 -- Quentin
308 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
309 filterByNeighbours threshold distanceMap = filteredMap
310 where
311 indexes :: [Index]
312 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
313 filteredMap :: Map (Index, Index) Double
314 filteredMap = Map.fromList
315 $ List.concat
316 $ map (\idx ->
317 let selected = List.reverse
318 $ List.sortOn snd
319 $ Map.toList
320 $ Map.filter (> 0)
321 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
322 in List.take (round threshold) selected
323 ) indexes
324
325
326
327
328