]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
Merge branch 'dev-merge' into dev
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 -- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 -- import Debug.Trace (trace)
22 import GHC.Float (sin, cos)
23 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
24 import Gargantext.Core.Methods.Distances.Conditional (conditional)
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Types (ClusterNode)
34 import Gargantext.Prelude
35 -- import qualified Graph.BAC.ProxemyOptim as BAC
36 import IGraph.Random -- (Gen(..))
37 import qualified Data.HashMap.Strict as HashMap
38 import qualified Data.List as List
39 import qualified Data.Map as Map
40 import qualified Data.Set as Set
41 import qualified Data.Vector.Storable as Vec
42 import qualified IGraph as Igraph
43 import qualified IGraph.Algorithms.Layout as Layout
44
45
46 -------------------------------------------------------------
47 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
48 -- defaultClustering x = pure $ BAC.defaultClustering x
49 defaultClustering x = spinglass 1 x
50
51 -------------------------------------------------------------
52 type Threshold = Double
53
54
55 cooc2graph' :: Ord t => Distance
56 -> Double
57 -> Map (t, t) Int
58 -> Map (Index, Index) Double
59 cooc2graph' distance threshold myCooc
60 = Map.filter (> threshold)
61 $ mat2map
62 $ measure distance
63 $ case distance of
64 Conditional -> map2mat Triangle 0 tiSize
65 Distributional -> map2mat Square 0 tiSize
66 $ Map.filter (> 1) myCooc'
67
68 where
69 (ti, _) = createIndices myCooc
70 tiSize = Map.size ti
71 myCooc' = toIndex ti myCooc
72
73
74 data PartitionMethod = Louvain | Spinglass
75 -- TODO Bac
76
77 -- coocurrences graph computation
78 cooc2graphWith :: PartitionMethod
79 -> Distance
80 -> Threshold
81 -> HashMap (NgramsTerm, NgramsTerm) Int
82 -> IO Graph
83 cooc2graphWith Louvain = undefined
84 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
85 -- cooc2graphWith Bac = cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
86
87
88 cooc2graphWith' :: ToComId a
89 => Partitions a
90 -> Distance
91 -> Threshold
92 -> HashMap (NgramsTerm, NgramsTerm) Int
93 -> IO Graph
94 cooc2graphWith' doPartitions distance threshold myCooc = do
95 let
96 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
97
98 nodesApprox :: Int
99 nodesApprox = n'
100 where
101 (as, bs) = List.unzip $ Map.keys distanceMap
102 n' = Set.size $ Set.fromList $ as <> bs
103
104 {- -- Debug
105 saveAsFileDebug "debug/distanceMap" distanceMap
106 printDebug "similarities" similarities
107 -}
108
109 partitions <- if (Map.size distanceMap > 0)
110 then doPartitions distanceMap
111 else panic "Text.Flow: DistanceMap is empty"
112
113 let
114 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
115
116 confluence' = confluence (Map.keys bridgeness') 3 True False
117
118 pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
119 diag bridgeness' confluence' partitions
120
121
122 doDistanceMap :: Distance
123 -> Threshold
124 -> HashMap (NgramsTerm, NgramsTerm) Int
125 -> ( Map (Int,Int) Double
126 , Map (Index, Index) Int
127 , Map NgramsTerm Index
128 )
129 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
130 where
131 -- TODO remove below
132 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
133 $ Map.fromList
134 $ HashMap.toList myCooc
135
136 (ti, _it) = createIndices theMatrix
137 tiSize = Map.size ti
138
139 {-
140 matCooc = case distance of -- Shape of the Matrix
141 Conditional -> map2mat Triangle 0 tiSize
142 Distributional -> map2mat Square 0 tiSize
143 $ toIndex ti theMatrix
144 similarities = measure distance matCooc
145 -}
146
147 similarities = measure Distributional
148 $ map2mat Square 0 tiSize
149 $ toIndex ti theMatrix
150
151 links = round (let n :: Double = fromIntegral tiSize in n * log n)
152
153 distanceMap = Map.fromList
154 $ List.take links
155 $ List.sortOn snd
156 $ Map.toList
157 $ edgesFilter
158 $ Map.filter (> threshold)
159 $ mat2map similarities
160
161 doDistanceMap Conditional _threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
162 where
163 myCooc' = Map.fromList $ HashMap.toList myCooc
164 (ti, _it) = createIndices myCooc'
165 -- tiSize = Map.size ti
166
167 -- links = round (let n :: Double = fromIntegral tiSize in n * log n)
168
169 distanceMap = toIndex ti
170 $ Map.fromList
171 -- List.take links
172 -- List.sortOn snd
173 $ HashMap.toList
174 -- HashMap.filter (> threshold)
175 $ conditional myCooc
176
177
178
179 ----------------------------------------------------------
180 -- | From data to Graph
181
182 type Occurrences = Map (Int, Int) Int
183
184 data2graph :: ToComId a
185 => [(Text, Int)]
186 -> Occurrences
187 -> Map (Int, Int) Double
188 -> Map (Int, Int) Double
189 -> [a]
190 -> Graph
191 data2graph labels occurences bridge conf partitions = Graph { _graph_nodes = nodes
192 , _graph_edges = edges
193 , _graph_metadata = Nothing
194 }
195 where
196
197 community_id_by_node_id = Map.fromList
198 $ map nodeId2comId partitions
199
200 nodes = map (setCoord ForceAtlas labels bridge)
201 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
202 , node_type = Terms -- or Unknown
203 , node_id = cs (show n)
204 , node_label = l
205 , node_x_coord = 0
206 , node_y_coord = 0
207 , node_attributes =
208 Attributes { clust_default = maybe 0 identity
209 (Map.lookup n community_id_by_node_id) }
210 , node_children = [] }
211 )
212 | (l, n) <- labels
213 , Set.member n $ Set.fromList
214 $ List.concat
215 $ map (\((s,t),d) -> if d > 0 && s /=t then [s,t] else [])
216 $ Map.toList bridge
217 ]
218
219 edges = [ Edge { edge_source = cs (show s)
220 , edge_target = cs (show t)
221 , edge_weight = weight
222 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
223 -- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
224 , edge_id = cs (show i)
225 }
226 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] )
227 (Map.toList bridge)
228 , s /= t
229 , weight > 0
230 ]
231
232
233 ------------------------------------------------------------------------
234
235 data Layout = KamadaKawai | ACP | ForceAtlas
236
237
238 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
239 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
240 where
241 (x,y) = f i
242
243
244 -- | ACP
245 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
246 setCoord l labels m (n,node) = node { node_x_coord = x
247 , node_y_coord = y
248 }
249 where
250 (x,y) = getCoord l labels m n
251
252
253 getCoord :: Ord a
254 => Layout
255 -> [(a, Int)]
256 -> Map (Int, Int) Double
257 -> Int
258 -> (Double, Double)
259 getCoord KamadaKawai _ _m _n = undefined -- layout m n
260
261 getCoord ForceAtlas _ _ n = (sin d, cos d)
262 where
263 d = fromIntegral n
264
265 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
266 $ Map.lookup n
267 $ pcaReduceTo (Dimension 2)
268 $ mapArray labels m
269 where
270 to2d :: Vec.Vector Double -> (Double, Double)
271 to2d v = (x',y')
272 where
273 ds = take 2 $ Vec.toList v
274 x' = head' "to2d" ds
275 y' = last' "to2d" ds
276
277 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
278 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
279 where
280 ns = map snd items
281
282 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
283 toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
284 ------------------------------------------------------------------------
285
286 -- | KamadaKawai Layout
287 -- TODO TEST: check labels, nodeId and coordinates
288 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
289 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
290 where
291 coord :: (Map Int (Double,Double))
292 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
293 --p = Layout.defaultLGL
294 p = Layout.kamadaKawai
295 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
296
297 -----------------------------------------------------------------------------
298 -- MISC Tools
299 cooc2graph'' :: Ord t => Distance
300 -> Double
301 -> Map (t, t) Int
302 -> Map (Index, Index) Double
303 cooc2graph'' distance threshold myCooc = neighbourMap
304 where
305 (ti, _) = createIndices myCooc
306 myCooc' = toIndex ti myCooc
307 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
308 distanceMat = measure distance matCooc
309 neighbourMap = filterByNeighbours threshold
310 $ mat2map distanceMat
311
312 -- Quentin
313 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
314 filterByNeighbours threshold distanceMap = filteredMap
315 where
316 indexes :: [Index]
317 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
318 filteredMap :: Map (Index, Index) Double
319 filteredMap = Map.fromList
320 $ List.concat
321 $ map (\idx ->
322 let selected = List.reverse
323 $ List.sortOn snd
324 $ Map.toList
325 $ Map.filter (> 0)
326 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
327 in List.take (round threshold) selected
328 ) indexes
329
330
331
332
333