]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
start integrating infomap
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
28 import Gargantext.Core.Statistics
29 import Gargantext.Core.Viz.Graph
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.Text as Text
45 import qualified Data.Vector.Storable as Vec
46 import qualified Graph.BAC.ProxemyOptim as BAC
47 import qualified IGraph as Igraph
48 import qualified IGraph.Algorithms.Layout as Layout
49
50
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
58
59
60 -------------------------------------------------------------
61 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
62 -- defaultClustering x = pure $ BAC.defaultClustering x
63 defaultClustering x = spinglass 1 x
64
65 -------------------------------------------------------------
66 type Threshold = Double
67
68
69 cooc2graph' :: Ord t => Distance
70 -> Double
71 -> Map (t, t) Int
72 -> Map (Index, Index) Double
73 cooc2graph' distance threshold myCooc
74 = Map.filter (> threshold)
75 $ mat2map
76 $ measure distance
77 $ case distance of
78 Conditional -> map2mat Triangle 0 tiSize
79 Distributional -> map2mat Square 0 tiSize
80 $ Map.filter (> 1) myCooc'
81
82 where
83 (ti, _) = createIndices myCooc
84 tiSize = Map.size ti
85 myCooc' = toIndex ti myCooc
86
87
88
89 -- coocurrences graph computation
90 cooc2graphWith :: PartitionMethod
91 -> Distance
92 -> Threshold
93 -> HashMap (NgramsTerm, NgramsTerm) Int
94 -> IO Graph
95 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
96 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
97 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
98 -- TODO: change these options, or make them configurable in UI?
99
100
101 cooc2graphWith' :: ToComId a
102 => Partitions a
103 -> Distance
104 -> Threshold
105 -> HashMap (NgramsTerm, NgramsTerm) Int
106 -> IO Graph
107 cooc2graphWith' doPartitions distance threshold myCooc = do
108 let
109 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
110
111 --{- -- Debug
112 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
113 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
114 -- printDebug "similarities" similarities
115 --}
116
117 partitions <- if (Map.size distanceMap > 0)
118 then doPartitions distanceMap
119 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
120 , "Maybe you should add more Map Terms in your list"
121 , "Tutorial: link todo"
122 ]
123
124 let
125 nodesApprox :: Int
126 nodesApprox = n'
127 where
128 (as, bs) = List.unzip $ Map.keys distanceMap
129 n' = Set.size $ Set.fromList $ as <> bs
130 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
131 confluence' = confluence (Map.keys bridgeness') 3 True False
132
133 pure $ data2graph ti diag bridgeness' confluence' partitions
134
135
136 doDistanceMap :: Distance
137 -> Threshold
138 -> HashMap (NgramsTerm, NgramsTerm) Int
139 -> ( Map (Int,Int) Double
140 , Map (Index, Index) Int
141 , Map NgramsTerm Index
142 )
143 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
144 where
145 -- TODO remove below
146 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
147 $ Map.fromList
148 $ HashMap.toList myCooc
149
150 (ti, _it) = createIndices theMatrix
151 tiSize = Map.size ti
152
153 similarities = measure Distributional
154 $ map2mat Square 0 tiSize
155 $ toIndex ti theMatrix
156
157 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
158
159 distanceMap = Map.fromList
160 $ List.take links
161 $ List.reverse
162 $ List.sortOn snd
163 $ Map.toList
164 $ edgesFilter
165 $ Map.filter (> threshold)
166 $ mat2map similarities
167
168 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
169 where
170 myCooc' = Map.fromList $ HashMap.toList myCooc
171 (ti, _it) = createIndices myCooc'
172
173 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
174
175 distanceMap = toIndex ti
176 $ Map.fromList
177 $ List.take links
178 $ List.sortOn snd
179 $ HashMap.toList
180 $ HashMap.filter (> threshold)
181 $ conditional myCooc
182
183 ----------------------------------------------------------
184 -- | From data to Graph
185
186 type Occurrences = Int
187
188 data2graph :: ToComId a
189 => Map NgramsTerm Int
190 -> Map (Int, Int) Occurrences
191 -> Map (Int, Int) Double
192 -> Map (Int, Int) Double
193 -> [a]
194 -> Graph
195 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
196 , _graph_edges = edges
197 , _graph_metadata = Nothing
198 }
199 where
200
201 nodes = map (setCoord ForceAtlas labels bridge)
202 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
203 , node_type = Terms -- or Unknown
204 , node_id = cs (show n)
205 , node_label = unNgramsTerm l
206 , node_x_coord = 0
207 , node_y_coord = 0
208 , node_attributes = Attributes { clust_default = fromMaybe 0
209 (Map.lookup n community_id_by_node_id)
210 }
211 , node_children = []
212 }
213 )
214 | (l, n) <- labels
215 , Set.member n nodesWithScores
216 ]
217
218 edges = [ Edge { edge_source = cs (show s)
219 , edge_target = cs (show t)
220 , edge_weight = weight
221 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
222 , edge_id = cs (show i)
223 }
224 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
225 , s /= t
226 , weight > 0
227 ]
228
229 community_id_by_node_id = Map.fromList
230 $ map nodeId2comId partitions
231
232 labels = Map.toList labels'
233
234 nodesWithScores = Set.fromList
235 $ List.concat
236 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
237 $ Map.toList bridge
238
239
240 ------------------------------------------------------------------------
241
242 data Layout = KamadaKawai | ACP | ForceAtlas
243
244
245 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
246 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
247 where
248 (x,y) = f i
249
250
251 -- | ACP
252 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
253 setCoord l labels m (n,node) = node { node_x_coord = x
254 , node_y_coord = y
255 }
256 where
257 (x,y) = getCoord l labels m n
258
259
260 getCoord :: Ord a
261 => Layout
262 -> [(a, Int)]
263 -> Map (Int, Int) Double
264 -> Int
265 -> (Double, Double)
266 getCoord KamadaKawai _ _m _n = undefined -- layout m n
267
268 getCoord ForceAtlas _ _ n = (sin d, cos d)
269 where
270 d = fromIntegral n
271
272 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
273 $ Map.lookup n
274 $ pcaReduceTo (Dimension 2)
275 $ mapArray labels m
276 where
277 to2d :: Vec.Vector Double -> (Double, Double)
278 to2d v = (x',y')
279 where
280 ds = take 2 $ Vec.toList v
281 x' = head' "to2d" ds
282 y' = last' "to2d" ds
283
284 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
285 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
286 where
287 ns = map snd items
288
289 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
290 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
291 ------------------------------------------------------------------------
292
293 -- | KamadaKawai Layout
294 -- TODO TEST: check labels, nodeId and coordinates
295 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
296 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
297 where
298 coord :: (Map Int (Double,Double))
299 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
300 --p = Layout.defaultLGL
301 p = Layout.kamadaKawai
302 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
303
304 -----------------------------------------------------------------------------
305 -- MISC Tools
306 cooc2graph'' :: Ord t => Distance
307 -> Double
308 -> Map (t, t) Int
309 -> Map (Index, Index) Double
310 cooc2graph'' distance threshold myCooc = neighbourMap
311 where
312 (ti, _) = createIndices myCooc
313 myCooc' = toIndex ti myCooc
314 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
315 distanceMat = measure distance matCooc
316 neighbourMap = filterByNeighbours threshold
317 $ mat2map distanceMat
318
319 -- Quentin
320 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
321 filterByNeighbours threshold distanceMap = filteredMap
322 where
323 indexes :: [Index]
324 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
325 filteredMap :: Map (Index, Index) Double
326 filteredMap = Map.fromList
327 $ List.concat
328 $ map (\idx ->
329 let selected = List.reverse
330 $ List.sortOn snd
331 $ Map.toList
332 $ Map.filter (> 0)
333 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
334 in List.take (round threshold) selected
335 ) indexes
336
337
338
339
340