]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
improve performance of logDistributional, upgrade to accelerate 1.3 + lots of debug...
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Debug.Trace
18
19 import Data.Aeson
20 import Data.HashMap.Strict (HashMap)
21 import Data.Map (Map)
22 import Data.Maybe (fromMaybe)
23 import Data.Swagger hiding (items)
24 import GHC.Float (sin, cos)
25 import GHC.Generics (Generic)
26 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
27 import Gargantext.Core.Methods.Distances (Distance(..), measure)
28 import Gargantext.Core.Methods.Distances.Conditional (conditional)
29 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
30 import Gargantext.Core.Statistics
31 import Gargantext.Core.Viz.Graph
32 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
33 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
34 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
35 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
36 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
37 import Gargantext.Prelude
38 import Graph.Types (ClusterNode)
39 import IGraph.Random -- (Gen(..))
40 import Test.QuickCheck (elements)
41 import Test.QuickCheck.Arbitrary
42 import qualified Data.HashMap.Strict as HashMap
43 import qualified Data.List as List
44 import qualified Data.Map as Map
45 import qualified Data.Set as Set
46 import qualified Data.Text as Text
47 import qualified Data.Vector.Storable as Vec
48 import qualified Graph.BAC.ProxemyOptim as BAC
49 import qualified IGraph as Igraph
50 import qualified IGraph.Algorithms.Layout as Layout
51
52
53 data PartitionMethod = Spinglass | Confluence | Infomap
54 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
55 instance FromJSON PartitionMethod
56 instance ToJSON PartitionMethod
57 instance ToSchema PartitionMethod
58 instance Arbitrary PartitionMethod where
59 arbitrary = elements [ minBound .. maxBound ]
60
61
62 -------------------------------------------------------------
63 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
64 -- defaultClustering x = pure $ BAC.defaultClustering x
65 defaultClustering x = spinglass 1 x
66
67 -------------------------------------------------------------
68 type Threshold = Double
69
70
71 cooc2graph' :: Ord t => Distance
72 -> Double
73 -> Map (t, t) Int
74 -> Map (Index, Index) Double
75 cooc2graph' distance threshold myCooc
76 = Map.filter (> threshold)
77 $ mat2map
78 $ measure distance
79 $ case distance of
80 Conditional -> map2mat Triangle 0 tiSize
81 Distributional -> map2mat Square 0 tiSize
82 $ Map.filter (> 1) myCooc'
83
84 where
85 (ti, _) = createIndices myCooc
86 tiSize = Map.size ti
87 myCooc' = toIndex ti myCooc
88
89
90
91 -- coocurrences graph computation
92 cooc2graphWith :: PartitionMethod
93 -> Distance
94 -> Threshold
95 -> HashMap (NgramsTerm, NgramsTerm) Int
96 -> IO Graph
97 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
98 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
99 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
100 -- TODO: change these options, or make them configurable in UI?
101
102
103 cooc2graphWith' :: ToComId a
104 => Partitions a
105 -> Distance
106 -> Threshold
107 -> HashMap (NgramsTerm, NgramsTerm) Int
108 -> IO Graph
109 cooc2graphWith' doPartitions distance threshold myCooc = do
110 let (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
111 distanceMap `seq` trace "distanceMap OK" diag `seq` trace "diag OK" ti `seq` printDebug "ti done" ()
112
113 --{- -- Debug
114 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
115 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
116 -- printDebug "similarities" similarities
117 --}
118
119 partitions <- if (Map.size distanceMap > 0)
120 then doPartitions distanceMap
121 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
122 , "Maybe you should add more Map Terms in your list"
123 , "Tutorial: link todo"
124 ]
125 partitions `seq` printDebug "partitions done" ()
126 let
127 nodesApprox :: Int
128 nodesApprox = n'
129 where
130 (as, bs) = List.unzip $ Map.keys distanceMap
131 n' = Set.size $ Set.fromList $ as <> bs
132 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
133 confluence' = confluence (Map.keys bridgeness') 3 True False
134 seq bridgeness' $ printDebug "bridgeness OK" ()
135 seq confluence' $ printDebug "confluence OK" ()
136 pure $ data2graph ti diag bridgeness' confluence' partitions
137
138
139 doDistanceMap :: Distance
140 -> Threshold
141 -> HashMap (NgramsTerm, NgramsTerm) Int
142 -> ( Map (Int,Int) Double
143 , Map (Index, Index) Int
144 , Map NgramsTerm Index
145 )
146 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
147 where
148 -- TODO remove below
149 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
150 $ Map.fromList
151 $ HashMap.toList myCooc
152
153 (ti, _it) = createIndices theMatrix
154 tiSize = Map.size ti
155
156 similarities = (\m -> m `seq` trace "measure done" m)
157 $ (\m -> m `seq` trace "map2mat done" (measure Distributional m))
158 $ (\m -> m `seq` trace "toIndex done" (map2mat Square 0 tiSize m))
159 $ theMatrix `seq` trace "theMatrix done" (toIndex ti theMatrix)
160
161 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
162
163 distanceMap = Map.fromList . trace "fromList" identity
164 $ List.take links
165 $ List.reverse
166 $ List.sortOn snd
167 $ Map.toList
168 $ edgesFilter
169 $ (\m -> m `seq` trace "map2map done" (Map.filter (> threshold) m))
170 $ similarities `seq` mat2map (trace "similarities done" similarities)
171
172 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
173 where
174 myCooc' = Map.fromList $ HashMap.toList myCooc
175 (ti, _it) = createIndices myCooc'
176
177 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
178
179 distanceMap = toIndex ti
180 $ Map.fromList
181 $ List.take links
182 $ List.reverse
183 $ List.sortOn snd
184 $ HashMap.toList
185 $ HashMap.filter (> threshold)
186 $ conditional myCooc
187
188 ----------------------------------------------------------
189 -- | From data to Graph
190
191 type Occurrences = Int
192
193 data2graph :: ToComId a
194 => Map NgramsTerm Int
195 -> Map (Int, Int) Occurrences
196 -> Map (Int, Int) Double
197 -> Map (Int, Int) Double
198 -> [a]
199 -> Graph
200 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
201 , _graph_edges = edges
202 , _graph_metadata = Nothing
203 }
204 where
205
206 nodes = map (setCoord ForceAtlas labels bridge)
207 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
208 , node_type = Terms -- or Unknown
209 , node_id = cs (show n)
210 , node_label = unNgramsTerm l
211 , node_x_coord = 0
212 , node_y_coord = 0
213 , node_attributes = Attributes { clust_default = fromMaybe 0
214 (Map.lookup n community_id_by_node_id)
215 }
216 , node_children = []
217 }
218 )
219 | (l, n) <- labels
220 , Set.member n nodesWithScores
221 ]
222
223 edges = [ Edge { edge_source = cs (show s)
224 , edge_target = cs (show t)
225 , edge_weight = weight
226 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
227 , edge_id = cs (show i)
228 }
229 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
230 , s /= t
231 , weight > 0
232 ]
233
234 community_id_by_node_id = Map.fromList
235 $ map nodeId2comId partitions
236
237 labels = Map.toList labels'
238
239 nodesWithScores = Set.fromList
240 $ List.concat
241 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
242 $ Map.toList bridge
243
244
245 ------------------------------------------------------------------------
246
247 data Layout = KamadaKawai | ACP | ForceAtlas
248
249
250 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
251 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
252 where
253 (x,y) = f i
254
255
256 -- | ACP
257 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
258 setCoord l labels m (n,node) = node { node_x_coord = x
259 , node_y_coord = y
260 }
261 where
262 (x,y) = getCoord l labels m n
263
264
265 getCoord :: Ord a
266 => Layout
267 -> [(a, Int)]
268 -> Map (Int, Int) Double
269 -> Int
270 -> (Double, Double)
271 getCoord KamadaKawai _ _m _n = undefined -- layout m n
272
273 getCoord ForceAtlas _ _ n = (sin d, cos d)
274 where
275 d = fromIntegral n
276
277 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
278 $ Map.lookup n
279 $ pcaReduceTo (Dimension 2)
280 $ mapArray labels m
281 where
282 to2d :: Vec.Vector Double -> (Double, Double)
283 to2d v = (x',y')
284 where
285 ds = take 2 $ Vec.toList v
286 x' = head' "to2d" ds
287 y' = last' "to2d" ds
288
289 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
290 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
291 where
292 ns = map snd items
293
294 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
295 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
296 ------------------------------------------------------------------------
297
298 -- | KamadaKawai Layout
299 -- TODO TEST: check labels, nodeId and coordinates
300 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
301 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
302 where
303 coord :: (Map Int (Double,Double))
304 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
305 --p = Layout.defaultLGL
306 p = Layout.kamadaKawai
307 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
308
309 -----------------------------------------------------------------------------
310 -- MISC Tools
311 cooc2graph'' :: Ord t => Distance
312 -> Double
313 -> Map (t, t) Int
314 -> Map (Index, Index) Double
315 cooc2graph'' distance threshold myCooc = neighbourMap
316 where
317 (ti, _) = createIndices myCooc
318 myCooc' = toIndex ti myCooc
319 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
320 distanceMat = measure distance matCooc
321 neighbourMap = filterByNeighbours threshold
322 $ mat2map distanceMat
323
324 -- Quentin
325 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
326 filterByNeighbours threshold distanceMap = filteredMap
327 where
328 indexes :: [Index]
329 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
330 filteredMap :: Map (Index, Index) Double
331 filteredMap = Map.fromList
332 $ List.concat
333 $ map (\idx ->
334 let selected = List.reverse
335 $ List.sortOn snd
336 $ Map.toList
337 $ Map.filter (> 0)
338 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
339 in List.take (round threshold) selected
340 ) indexes
341
342
343
344
345