]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[WIP] bridgeness optim
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness3, Partitions, ToComId(..))
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
50
51
52 data PartitionMethod = Spinglass | Confluence | Infomap
53 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
54 instance FromJSON PartitionMethod
55 instance ToJSON PartitionMethod
56 instance ToSchema PartitionMethod
57 instance Arbitrary PartitionMethod where
58 arbitrary = elements [ minBound .. maxBound ]
59
60
61 -------------------------------------------------------------
62 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
63 -- defaultClustering x = pure $ BAC.defaultClustering x
64 defaultClustering x = spinglass 1 x
65
66 -------------------------------------------------------------
67 type Threshold = Double
68
69
70 cooc2graph' :: Ord t => Similarity
71 -> Double
72 -> Map (t, t) Int
73 -> Map (Index, Index) Double
74 cooc2graph' distance threshold myCooc
75 = Map.filter (> threshold)
76 $ mat2map
77 $ measure distance
78 $ case distance of
79 Conditional -> map2mat Triangle 0 tiSize
80 Distributional -> map2mat Square 0 tiSize
81 $ Map.filter (> 1) myCooc'
82
83 where
84 (ti, _) = createIndices myCooc
85 tiSize = Map.size ti
86 myCooc' = toIndex ti myCooc
87
88
89
90 -- coocurrences graph computation
91 cooc2graphWith :: PartitionMethod
92 -> MultiPartite
93 -> Similarity
94 -> Threshold
95 -> Strength
96 -> HashMap (NgramsTerm, NgramsTerm) Int
97 -> IO Graph
98 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
99 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
100 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
101 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
102 -- TODO: change these options, or make them configurable in UI?
103
104
105 cooc2graphWith' :: ToComId a
106 => Partitions a
107 -> MultiPartite
108 -> Similarity
109 -> Threshold
110 -> Strength
111 -> HashMap (NgramsTerm, NgramsTerm) Int
112 -> IO Graph
113 cooc2graphWith' doPartitions multi similarity threshold strength myCooc = do
114 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
115 distanceMap `seq` diag `seq` ti `seq` return ()
116
117 --{- -- Debug
118 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
119 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
120 -- printDebug "similarities" similarities
121 --}
122
123 partitions <- if (Map.size distanceMap > 0)
124 then doPartitions distanceMap
125 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
126 , "Maybe you should add more Map Terms in your list"
127 , "Tutorial: link todo"
128 ]
129 length partitions `seq` return ()
130
131 let
132 !confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
133 !bridgeness' = bridgeness3 confluence' distanceMap
134 pure $ data2graph multi ti diag bridgeness' confluence' partitions
135
136 type Reverse = Bool
137
138 doSimilarityMap :: Similarity
139 -> Threshold
140 -> Strength
141 -> HashMap (NgramsTerm, NgramsTerm) Int
142 -> ( Map (Int,Int) Double
143 , Map (Index, Index) Int
144 , Map NgramsTerm Index
145 )
146 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
147 where
148 -- TODO remove below
149 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
150 $ Map.fromList
151 $ HashMap.toList myCooc
152
153 (ti, _it) = createIndices theMatrix
154 tiSize = Map.size ti
155
156 similarities = (\m -> m `seq` m)
157 $ (\m -> m `seq` measure Distributional m)
158 $ (\m -> m `seq` map2mat Square 0 tiSize m)
159 $ theMatrix `seq` toIndex ti theMatrix
160
161 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
162
163 distanceMap = Map.fromList
164 $ List.take links
165 $ (if strength == Weak then List.reverse else identity)
166 $ List.sortOn snd
167 $ Map.toList
168 $ edgesFilter
169 $ (\m -> m `seq` Map.filter (> threshold) m)
170 $ similarities `seq` mat2map similarities
171
172 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
173 where
174 myCooc' = Map.fromList $ HashMap.toList myCooc
175 (ti, _it) = createIndices myCooc'
176 links = round (let n :: Double = fromIntegral (Map.size ti) in n * (log n)^(2::Int))
177 distanceMap = toIndex ti
178 $ Map.fromList
179 $ List.take links
180 $ (if strength == Weak then List.reverse else identity)
181 $ List.sortOn snd
182 $ HashMap.toList
183 $ HashMap.filter (> threshold)
184 $ conditional myCooc
185
186 ----------------------------------------------------------
187 -- | From data to Graph
188 type Occurrences = Int
189
190 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
191 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
192 if HashSet.member t s1
193 then t1
194 else t2
195
196
197 data2graph :: ToComId a
198 => MultiPartite
199 -> Map NgramsTerm Int
200 -> Map (Int, Int) Occurrences
201 -> Map (Int, Int) Double
202 -> Map (Int, Int) Double
203 -> [a]
204 -> Graph
205 data2graph multi labels' occurences bridge conf partitions =
206 Graph { _graph_nodes = nodes
207 , _graph_edges = edges
208 , _graph_metadata = Nothing
209 }
210
211 where
212
213 nodes = map (setCoord ForceAtlas labels bridge)
214 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
215 , node_type = nodeTypeWith multi label
216 , node_id = (cs . show) n
217 , node_label = unNgramsTerm label
218 , node_x_coord = 0
219 , node_y_coord = 0
220 , node_attributes =
221 Attributes { clust_default = fromMaybe 0
222 (Map.lookup n community_id_by_node_id)
223 }
224 , node_children = []
225 }
226 )
227 | (label, n) <- labels
228 , Set.member n toKeep
229 ]
230
231 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
232
233 edges = [ Edge { edge_source = cs (show s)
234 , edge_target = cs (show t)
235 , edge_weight = weight
236 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
237 , edge_id = cs (show i)
238 }
239 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
240 , s /= t
241 , weight > 0
242 ]
243
244 community_id_by_node_id = Map.fromList
245 $ map nodeId2comId partitions
246
247 labels = Map.toList labels'
248
249
250 ------------------------------------------------------------------------
251
252 data Layout = KamadaKawai | ACP | ForceAtlas
253
254
255 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
256 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
257 where
258 (x,y) = f i
259
260
261 -- | ACP
262 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
263 setCoord l labels m (n,node) = node { node_x_coord = x
264 , node_y_coord = y
265 }
266 where
267 (x,y) = getCoord l labels m n
268
269
270 getCoord :: Ord a
271 => Layout
272 -> [(a, Int)]
273 -> Map (Int, Int) Double
274 -> Int
275 -> (Double, Double)
276 getCoord KamadaKawai _ _m _n = undefined -- layout m n
277
278 getCoord ForceAtlas _ _ n = (sin d, cos d)
279 where
280 d = fromIntegral n
281
282 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
283 $ Map.lookup n
284 $ pcaReduceTo (Dimension 2)
285 $ mapArray labels m
286 where
287 to2d :: Vec.Vector Double -> (Double, Double)
288 to2d v = (x',y')
289 where
290 ds = take 2 $ Vec.toList v
291 x' = head' "to2d" ds
292 y' = last' "to2d" ds
293
294 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
295 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
296 where
297 ns = map snd items
298
299 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
300 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
301 ------------------------------------------------------------------------
302
303 -- | KamadaKawai Layout
304 -- TODO TEST: check labels, nodeId and coordinates
305 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
306 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
307 where
308 coord :: (Map Int (Double,Double))
309 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
310 --p = Layout.defaultLGL
311 p = Layout.kamadaKawai
312 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
313
314 -----------------------------------------------------------------------------
315 -- MISC Tools
316 cooc2graph'' :: Ord t => Similarity
317 -> Double
318 -> Map (t, t) Int
319 -> Map (Index, Index) Double
320 cooc2graph'' distance threshold myCooc = neighbourMap
321 where
322 (ti, _) = createIndices myCooc
323 myCooc' = toIndex ti myCooc
324 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
325 distanceMat = measure distance matCooc
326 neighbourMap = filterByNeighbours threshold
327 $ mat2map distanceMat
328
329 -- Quentin
330 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
331 filterByNeighbours threshold distanceMap = filteredMap
332 where
333 indexes :: [Index]
334 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
335 filteredMap :: Map (Index, Index) Double
336 filteredMap = Map.fromList
337 $ List.concat
338 $ map (\idx ->
339 let selected = List.reverse
340 $ List.sortOn snd
341 $ Map.toList
342 $ Map.filter (> 0)
343 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
344 in List.take (round threshold) selected
345 ) indexes
346