]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
use gargantext-graph's confluence implementation in graph construction
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Debug.Trace
18
19 import Data.Aeson
20 import Data.HashMap.Strict (HashMap)
21 import Data.Map (Map)
22 import Data.Maybe (fromMaybe)
23 import Data.Swagger hiding (items)
24 import GHC.Float (sin, cos)
25 import GHC.Generics (Generic)
26 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
27 import Gargantext.Core.Methods.Distances (Distance(..), measure)
28 import Gargantext.Core.Methods.Distances.Conditional (conditional)
29 -- import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
30 import Gargantext.Core.Statistics
31 import Gargantext.Core.Viz.Graph
32 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
33 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
34 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
35 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
36 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
37 import Gargantext.Prelude
38 import Graph.Types (ClusterNode)
39 import IGraph.Random -- (Gen(..))
40 import Test.QuickCheck (elements)
41 import Test.QuickCheck.Arbitrary
42 import qualified Data.HashMap.Strict as HashMap
43 import qualified Data.List as List
44 import qualified Data.Map as Map
45 import qualified Data.Set as Set
46 import qualified Data.Text as Text
47 import qualified Data.Vector.Storable as Vec
48 import qualified Graph.BAC.ProxemyOptim as BAC
49 import qualified IGraph as Igraph
50 import qualified IGraph.Algorithms.Layout as Layout
51
52
53 data PartitionMethod = Spinglass | Confluence | Infomap
54 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
55 instance FromJSON PartitionMethod
56 instance ToJSON PartitionMethod
57 instance ToSchema PartitionMethod
58 instance Arbitrary PartitionMethod where
59 arbitrary = elements [ minBound .. maxBound ]
60
61
62 -------------------------------------------------------------
63 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
64 -- defaultClustering x = pure $ BAC.defaultClustering x
65 defaultClustering x = spinglass 1 x
66
67 -------------------------------------------------------------
68 type Threshold = Double
69
70
71 cooc2graph' :: Ord t => Distance
72 -> Double
73 -> Map (t, t) Int
74 -> Map (Index, Index) Double
75 cooc2graph' distance threshold myCooc
76 = Map.filter (> threshold)
77 $ mat2map
78 $ measure distance
79 $ case distance of
80 Conditional -> map2mat Triangle 0 tiSize
81 Distributional -> map2mat Square 0 tiSize
82 $ Map.filter (> 1) myCooc'
83
84 where
85 (ti, _) = createIndices myCooc
86 tiSize = Map.size ti
87 myCooc' = toIndex ti myCooc
88
89
90
91 -- coocurrences graph computation
92 cooc2graphWith :: PartitionMethod
93 -> Distance
94 -> Threshold
95 -> HashMap (NgramsTerm, NgramsTerm) Int
96 -> IO Graph
97 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
98 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
99 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
100 -- TODO: change these options, or make them configurable in UI?
101
102
103 cooc2graphWith' :: ToComId a
104 => Partitions a
105 -> Distance
106 -> Threshold
107 -> HashMap (NgramsTerm, NgramsTerm) Int
108 -> IO Graph
109 cooc2graphWith' doPartitions distance threshold myCooc = do
110 let (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
111 distanceMap `seq` trace "distanceMap OK" diag `seq` trace "diag OK" ti `seq` printDebug "ti done" ()
112
113 --{- -- Debug
114 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
115 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
116 -- printDebug "similarities" similarities
117 --}
118
119 partitions <- if (Map.size distanceMap > 0)
120 then doPartitions distanceMap
121 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
122 , "Maybe you should add more Map Terms in your list"
123 , "Tutorial: link todo"
124 ]
125 partitions `seq` printDebug "partitions done" ()
126 let
127 nodesApprox :: Int
128 nodesApprox = n'
129 where
130 (as, bs) = List.unzip $ Map.keys distanceMap
131 n' = Set.size $ Set.fromList $ as <> bs
132 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
133 confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
134 -- confluence (Map.keys bridgeness') 3 True False
135 seq bridgeness' $ printDebug "bridgeness OK" ()
136 seq confluence' $ printDebug "confluence OK" ()
137 pure $ data2graph ti diag bridgeness' confluence' partitions
138
139
140 doDistanceMap :: Distance
141 -> Threshold
142 -> HashMap (NgramsTerm, NgramsTerm) Int
143 -> ( Map (Int,Int) Double
144 , Map (Index, Index) Int
145 , Map NgramsTerm Index
146 )
147 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
148 where
149 -- TODO remove below
150 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
151 $ Map.fromList
152 $ HashMap.toList myCooc
153
154 (ti, _it) = createIndices theMatrix
155 tiSize = Map.size ti
156
157 similarities = (\m -> m `seq` trace "measure done" m)
158 $ (\m -> m `seq` trace "map2mat done" (measure Distributional m))
159 $ (\m -> m `seq` trace "toIndex done" (map2mat Square 0 tiSize m))
160 $ theMatrix `seq` trace "theMatrix done" (toIndex ti theMatrix)
161
162 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
163
164 distanceMap = Map.fromList . trace "fromList" identity
165 $ List.take links
166 $ List.reverse
167 $ List.sortOn snd
168 $ Map.toList
169 $ edgesFilter
170 $ (\m -> m `seq` trace "map2map done" (Map.filter (> threshold) m))
171 $ similarities `seq` mat2map (trace "similarities done" similarities)
172
173 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
174 where
175 myCooc' = Map.fromList $ HashMap.toList myCooc
176 (ti, _it) = createIndices myCooc'
177
178 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
179
180 distanceMap = toIndex ti
181 $ Map.fromList
182 $ List.take links
183 $ List.reverse
184 $ List.sortOn snd
185 $ HashMap.toList
186 $ HashMap.filter (> threshold)
187 $ conditional myCooc
188
189 ----------------------------------------------------------
190 -- | From data to Graph
191
192 type Occurrences = Int
193
194 data2graph :: ToComId a
195 => Map NgramsTerm Int
196 -> Map (Int, Int) Occurrences
197 -> Map (Int, Int) Double
198 -> Map (Int, Int) Double
199 -> [a]
200 -> Graph
201 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
202 , _graph_edges = edges
203 , _graph_metadata = Nothing
204 }
205 where
206
207 nodes = map (setCoord ForceAtlas labels bridge)
208 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
209 , node_type = Terms -- or Unknown
210 , node_id = cs (show n)
211 , node_label = unNgramsTerm l
212 , node_x_coord = 0
213 , node_y_coord = 0
214 , node_attributes = Attributes { clust_default = fromMaybe 0
215 (Map.lookup n community_id_by_node_id)
216 }
217 , node_children = []
218 }
219 )
220 | (l, n) <- labels
221 , Set.member n nodesWithScores
222 ]
223
224 edges = [ Edge { edge_source = cs (show s)
225 , edge_target = cs (show t)
226 , edge_weight = weight
227 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
228 , edge_id = cs (show i)
229 }
230 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
231 , s /= t
232 , weight > 0
233 ]
234
235 community_id_by_node_id = Map.fromList
236 $ map nodeId2comId partitions
237
238 labels = Map.toList labels'
239
240 nodesWithScores = Set.fromList
241 $ List.concat
242 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
243 $ Map.toList bridge
244
245
246 ------------------------------------------------------------------------
247
248 data Layout = KamadaKawai | ACP | ForceAtlas
249
250
251 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
252 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
253 where
254 (x,y) = f i
255
256
257 -- | ACP
258 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
259 setCoord l labels m (n,node) = node { node_x_coord = x
260 , node_y_coord = y
261 }
262 where
263 (x,y) = getCoord l labels m n
264
265
266 getCoord :: Ord a
267 => Layout
268 -> [(a, Int)]
269 -> Map (Int, Int) Double
270 -> Int
271 -> (Double, Double)
272 getCoord KamadaKawai _ _m _n = undefined -- layout m n
273
274 getCoord ForceAtlas _ _ n = (sin d, cos d)
275 where
276 d = fromIntegral n
277
278 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
279 $ Map.lookup n
280 $ pcaReduceTo (Dimension 2)
281 $ mapArray labels m
282 where
283 to2d :: Vec.Vector Double -> (Double, Double)
284 to2d v = (x',y')
285 where
286 ds = take 2 $ Vec.toList v
287 x' = head' "to2d" ds
288 y' = last' "to2d" ds
289
290 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
291 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
292 where
293 ns = map snd items
294
295 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
296 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
297 ------------------------------------------------------------------------
298
299 -- | KamadaKawai Layout
300 -- TODO TEST: check labels, nodeId and coordinates
301 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
302 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
303 where
304 coord :: (Map Int (Double,Double))
305 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
306 --p = Layout.defaultLGL
307 p = Layout.kamadaKawai
308 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
309
310 -----------------------------------------------------------------------------
311 -- MISC Tools
312 cooc2graph'' :: Ord t => Distance
313 -> Double
314 -> Map (t, t) Int
315 -> Map (Index, Index) Double
316 cooc2graph'' distance threshold myCooc = neighbourMap
317 where
318 (ti, _) = createIndices myCooc
319 myCooc' = toIndex ti myCooc
320 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
321 distanceMat = measure distance matCooc
322 neighbourMap = filterByNeighbours threshold
323 $ mat2map distanceMat
324
325 -- Quentin
326 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
327 filterByNeighbours threshold distanceMap = filteredMap
328 where
329 indexes :: [Index]
330 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
331 filteredMap :: Map (Index, Index) Double
332 filteredMap = Map.fromList
333 $ List.concat
334 $ map (\idx ->
335 let selected = List.reverse
336 $ List.sortOn snd
337 $ Map.toList
338 $ Map.filter (> 0)
339 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
340 in List.take (round threshold) selected
341 ) indexes
342
343
344
345
346