]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[MERGE]
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map.Strict (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 -- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, recursiveClustering, recursiveClustering', setNodes2clusterNodes)
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
31 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
32 import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map.Strict as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
50
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
58
59 data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
60 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
61 instance FromJSON BridgenessMethod
62 instance ToJSON BridgenessMethod
63 instance ToSchema BridgenessMethod
64 instance Arbitrary BridgenessMethod where
65 arbitrary = elements [ minBound .. maxBound ]
66
67
68 -------------------------------------------------------------
69 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
70 -- defaultClustering x = pure $ BAC.defaultClustering x
71 defaultClustering x = spinglass 1 x
72
73 -------------------------------------------------------------
74 type Threshold = Double
75
76
77 cooc2graph' :: Ord t => Similarity
78 -> Double
79 -> Map (t, t) Int
80 -> Map (Index, Index) Double
81 cooc2graph' distance threshold myCooc
82 = Map.filter (> threshold)
83 $ mat2map
84 $ measure distance
85 $ case distance of
86 Conditional -> map2mat Triangle 0 tiSize
87 Distributional -> map2mat Square 0 tiSize
88 $ Map.filter (> 1) myCooc'
89
90 where
91 (ti, _) = createIndices myCooc
92 tiSize = Map.size ti
93 myCooc' = toIndex ti myCooc
94
95
96
97 -- coocurrences graph computation
98 cooc2graphWith :: PartitionMethod
99 -> BridgenessMethod
100 -> MultiPartite
101 -> Similarity
102 -> Threshold
103 -> Strength
104 -> HashMap (NgramsTerm, NgramsTerm) Int
105 -> IO Graph
106 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
107 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
108 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
109 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
110 -- TODO: change these options, or make them configurable in UI?
111
112 cooc2graphWith' :: Partitions
113 -> BridgenessMethod
114 -> MultiPartite
115 -> Similarity
116 -> Threshold
117 -> Strength
118 -> HashMap (NgramsTerm, NgramsTerm) Int
119 -> IO Graph
120 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Conditional threshold strength myCooc = do
121 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
122 distanceMap `seq` diag `seq` ti `seq` return ()
123
124 partitions <- if (Map.size distanceMap > 0)
125 -- then recursiveClustering doPartitions distanceMap
126 then recursiveClustering' (spinglass' 1) distanceMap
127 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
128 , "Maybe you should add more Map Terms in your list"
129 , "Tutorial: TODO"
130 ]
131 length partitions `seq` return ()
132
133 let
134 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
135 !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0) distanceMap
136 {-
137 !bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
138 then bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
139 else bridgeness (Bridgeness_Advanced similarity confluence') distanceMap
140 -}
141 pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
142
143 cooc2graphWith' doPartitions bridgenessMethod multi Distributional threshold strength myCooc = do
144 let (distanceMap, diag, ti) = doSimilarityMap Distributional threshold strength myCooc
145 distanceMap `seq` diag `seq` ti `seq` return ()
146
147 partitions <- if (Map.size distanceMap > 0)
148 then recursiveClustering doPartitions distanceMap
149 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
150 , "Maybe you should add more Map Terms in your list"
151 , "Tutorial: TODO"
152 ]
153 length partitions `seq` return ()
154
155 let
156 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
157 !bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
158 then bridgeness (Bridgeness_Basic partitions 10.0) distanceMap
159 else bridgeness (Bridgeness_Advanced Distributional confluence') distanceMap
160
161 pure $ data2graph multi ti diag bridgeness' confluence' partitions
162
163
164
165
166
167
168 type Reverse = Bool
169
170 doSimilarityMap :: Similarity
171 -> Threshold
172 -> Strength
173 -> HashMap (NgramsTerm, NgramsTerm) Int
174 -> ( Map (Int,Int) Double
175 , Map (Index, Index) Int
176 , Map NgramsTerm Index
177 )
178
179 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
180 where
181 myCooc' = Map.fromList $ HashMap.toList myCooc
182
183 (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
184 $ Map.fromList
185 $ HashMap.toList myCooc
186
187 (ti, _it) = createIndices theMatrix
188 tiSize = Map.size ti
189
190 similarities = (\m -> m `seq` m)
191 $ (\m -> m `seq` measure Conditional m)
192 $ (\m -> m `seq` map2mat Square 0 tiSize m)
193 $ theMatrix `seq` toIndex ti theMatrix
194
195 links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
196 distanceMap = Map.fromList
197 $ List.take links
198 $ (if strength == Weak then List.reverse else identity)
199 $ List.sortOn snd
200 $ Map.toList
201 $ Map.filter (> threshold)
202 -- $ conditional myCooc
203 $ similarities `seq` mat2map similarities
204
205 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
206 where
207 -- TODO remove below
208 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
209 $ Map.fromList
210 $ HashMap.toList myCooc
211
212 (ti, _it) = createIndices theMatrix
213 tiSize = Map.size ti
214
215 similarities = (\m -> m `seq` m)
216 $ (\m -> m `seq` measure Distributional m)
217 $ (\m -> m `seq` map2mat Square 0 tiSize m)
218 $ theMatrix `seq` toIndex ti theMatrix
219
220 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
221
222 distanceMap = Map.fromList
223 $ List.take links
224 $ (if strength == Weak then List.reverse else identity)
225 $ List.sortOn snd
226 $ Map.toList
227 $ edgesFilter
228 $ (\m -> m `seq` Map.filter (> threshold) m)
229 $ similarities `seq` mat2map similarities
230
231 ----------------------------------------------------------
232 -- | From data to Graph
233 type Occurrences = Int
234
235 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
236 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
237 if HashSet.member t s1
238 then t1
239 else t2
240
241
242 data2graph :: MultiPartite
243 -> Map NgramsTerm Int
244 -> Map (Int, Int) Occurrences
245 -> Map (Int, Int) Double
246 -> Map (Int, Int) Double
247 -> [ClusterNode]
248 -> Graph
249 data2graph multi labels' occurences bridge conf partitions =
250 Graph { _graph_nodes = nodes
251 , _graph_edges = edges
252 , _graph_metadata = Nothing
253 }
254
255 where
256
257 nodes = map (setCoord ForceAtlas labels bridge)
258 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
259 , node_type = nodeTypeWith multi label
260 , node_id = (cs . show) n
261 , node_label = unNgramsTerm label
262 , node_x_coord = 0
263 , node_y_coord = 0
264 , node_attributes =
265 Attributes { clust_default = fromMaybe 0
266 (Map.lookup n community_id_by_node_id)
267 }
268 , node_children = []
269 }
270 )
271 | (label, n) <- labels
272 , Set.member n toKeep
273 ]
274
275 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
276
277 edges = [ Edge { edge_source = cs (show s)
278 , edge_hidden = Nothing
279 , edge_target = cs (show t)
280 , edge_weight = weight
281 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
282 , edge_id = cs (show i)
283 }
284 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
285 , s /= t
286 , weight > 0
287 ]
288
289 community_id_by_node_id = Map.fromList
290 $ map nodeId2comId partitions
291
292 labels = Map.toList labels'
293
294
295 ------------------------------------------------------------------------
296
297 data Layout = KamadaKawai | ACP | ForceAtlas
298
299
300 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
301 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
302 where
303 (x,y) = f i
304
305
306 -- | ACP
307 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
308 setCoord l labels m (n,node) = node { node_x_coord = x
309 , node_y_coord = y
310 }
311 where
312 (x,y) = getCoord l labels m n
313
314
315 getCoord :: Ord a
316 => Layout
317 -> [(a, Int)]
318 -> Map (Int, Int) Double
319 -> Int
320 -> (Double, Double)
321 getCoord KamadaKawai _ _m _n = undefined -- layout m n
322
323 getCoord ForceAtlas _ _ n = (sin d, cos d)
324 where
325 d = fromIntegral n
326
327 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
328 $ Map.lookup n
329 $ pcaReduceTo (Dimension 2)
330 $ mapArray labels m
331 where
332 to2d :: Vec.Vector Double -> (Double, Double)
333 to2d v = (x',y')
334 where
335 ds = take 2 $ Vec.toList v
336 x' = head' "to2d" ds
337 y' = last' "to2d" ds
338
339 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
340 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
341 where
342 ns = map snd items
343
344 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
345 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
346 ------------------------------------------------------------------------
347
348 -- | KamadaKawai Layout
349 -- TODO TEST: check labels, nodeId and coordinates
350 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
351 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
352 where
353 coord :: (Map Int (Double,Double))
354 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
355 --p = Layout.defaultLGL
356 p = Layout.kamadaKawai
357 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
358
359 -----------------------------------------------------------------------------
360 -- MISC Tools
361 cooc2graph'' :: Ord t => Similarity
362 -> Double
363 -> Map (t, t) Int
364 -> Map (Index, Index) Double
365 cooc2graph'' distance threshold myCooc = neighbourMap
366 where
367 (ti, _) = createIndices myCooc
368 myCooc' = toIndex ti myCooc
369 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
370 distanceMat = measure distance matCooc
371 neighbourMap = filterByNeighbours threshold
372 $ mat2map distanceMat
373
374 -- Quentin
375 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
376 filterByNeighbours threshold distanceMap = filteredMap
377 where
378 indexes :: [Index]
379 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
380 filteredMap :: Map (Index, Index) Double
381 filteredMap = Map.fromList
382 $ List.concat
383 $ map (\idx ->
384 let selected = List.reverse
385 $ List.sortOn snd
386 $ Map.toList
387 $ Map.filter (> 0)
388 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
389 in List.take (round threshold) selected
390 ) indexes