]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[MERGE]
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map.Strict (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 -- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId{-, recursiveClustering-}, recursiveClustering', setNodes2clusterNodes)
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
31 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
32 import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map.Strict as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
50
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
58
59 data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
60 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
61 instance FromJSON BridgenessMethod
62 instance ToJSON BridgenessMethod
63 instance ToSchema BridgenessMethod
64 instance Arbitrary BridgenessMethod where
65 arbitrary = elements [ minBound .. maxBound ]
66
67
68 -------------------------------------------------------------
69 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
70 -- defaultClustering x = pure $ BAC.defaultClustering x
71 defaultClustering x = spinglass 1 x
72
73 -------------------------------------------------------------
74 type Threshold = Double
75
76
77 cooc2graph' :: Ord t => Similarity
78 -> Double
79 -> Map (t, t) Int
80 -> Map (Index, Index) Double
81 cooc2graph' distance threshold myCooc
82 = Map.filter (> threshold)
83 $ mat2map
84 $ measure distance
85 $ case distance of
86 Conditional -> map2mat Triangle 0 tiSize
87 Distributional -> map2mat Square 0 tiSize
88 $ Map.filter (> 1) myCooc'
89
90 where
91 (ti, _) = createIndices myCooc
92 tiSize = Map.size ti
93 myCooc' = toIndex ti myCooc
94
95
96
97 -- coocurrences graph computation
98 cooc2graphWith :: PartitionMethod
99 -> BridgenessMethod
100 -> MultiPartite
101 -> Similarity
102 -> Threshold
103 -> Strength
104 -> HashMap (NgramsTerm, NgramsTerm) Int
105 -> IO Graph
106 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
107 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
108 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
109 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
110 -- TODO: change these options, or make them configurable in UI?
111
112 cooc2graphWith' :: Partitions
113 -> BridgenessMethod
114 -> MultiPartite
115 -> Similarity
116 -> Threshold
117 -> Strength
118 -> HashMap (NgramsTerm, NgramsTerm) Int
119 -> IO Graph
120 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Conditional threshold strength myCooc = do
121 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
122 distanceMap `seq` diag `seq` ti `seq` return ()
123
124 partitions <- if (Map.size distanceMap > 0)
125 -- then recursiveClustering doPartitions distanceMap
126 then recursiveClustering' (spinglass' 1) distanceMap
127 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
128 , "Maybe you should add more Map Terms in your list"
129 , "Tutorial: TODO"
130 ]
131 length partitions `seq` return ()
132
133 let
134 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
135 !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0 similarity) distanceMap
136 {-
137 !bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
138 then bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
139 else bridgeness (Bridgeness_Advanced similarity confluence') distanceMap
140 -}
141 pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
142
143 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Distributional threshold strength myCooc = do
144 let (distanceMap, diag, ti) = doSimilarityMap Distributional threshold strength myCooc
145 distanceMap `seq` diag `seq` ti `seq` return ()
146
147 partitions <- if (Map.size distanceMap > 0)
148 --then recursiveClustering doPartitions distanceMap
149 then recursiveClustering' (spinglass' 1) distanceMap
150 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
151 , "Maybe you should add more Map Terms in your list"
152 , "Tutorial: TODO"
153 ]
154 length partitions `seq` return ()
155
156 let
157 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
158 !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0 similarity) distanceMap
159 {-
160 !bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
161 then bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
162 else bridgeness (Bridgeness_Advanced Distributional confluence') distanceMap
163 pure $ data2graph multi ti diag bridgeness' confluence' partitions
164 -}
165 pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
166
167
168
169 type Reverse = Bool
170
171 doSimilarityMap :: Similarity
172 -> Threshold
173 -> Strength
174 -> HashMap (NgramsTerm, NgramsTerm) Int
175 -> ( Map (Int,Int) Double
176 , Map (Index, Index) Int
177 , Map NgramsTerm Index
178 )
179
180 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
181 where
182 myCooc' = Map.fromList $ HashMap.toList myCooc
183
184 (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
185 $ Map.fromList
186 $ HashMap.toList myCooc
187
188 (ti, _it) = createIndices theMatrix
189 tiSize = Map.size ti
190
191 similarities = (\m -> m `seq` m)
192 $ (\m -> m `seq` measure Conditional m)
193 $ (\m -> m `seq` map2mat Square 0 tiSize m)
194 $ theMatrix `seq` toIndex ti theMatrix
195
196 links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
197 distanceMap = Map.fromList
198 $ List.take links
199 $ (if strength == Weak then List.reverse else identity)
200 $ List.sortOn snd
201 $ Map.toList
202 $ Map.filter (> threshold)
203 -- $ conditional myCooc
204 $ similarities `seq` mat2map similarities
205
206 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
207 where
208 -- TODO remove below
209 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
210 $ Map.fromList
211 $ HashMap.toList myCooc
212
213 (ti, _it) = createIndices theMatrix
214 tiSize = Map.size ti
215
216 similarities = (\m -> m `seq` m)
217 $ (\m -> m `seq` measure Distributional m)
218 $ (\m -> m `seq` map2mat Square 0 tiSize m)
219 $ theMatrix `seq` toIndex ti theMatrix
220
221 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
222
223 distanceMap = Map.fromList
224 $ List.take links
225 $ (if strength == Weak then List.reverse else identity)
226 $ List.sortOn snd
227 $ Map.toList
228 $ edgesFilter
229 $ (\m -> m `seq` Map.filter (> threshold) m)
230 $ similarities `seq` mat2map similarities
231
232 ----------------------------------------------------------
233 -- | From data to Graph
234 type Occurrences = Int
235
236 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
237 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
238 if HashSet.member t s1
239 then t1
240 else t2
241
242
243 data2graph :: MultiPartite
244 -> Map NgramsTerm Int
245 -> Map (Int, Int) Occurrences
246 -> Map (Int, Int) Double
247 -> Map (Int, Int) Double
248 -> [ClusterNode]
249 -> Graph
250 data2graph multi labels' occurences bridge conf partitions =
251 Graph { _graph_nodes = nodes
252 , _graph_edges = edges
253 , _graph_metadata = Nothing
254 }
255
256 where
257
258 nodes = map (setCoord ForceAtlas labels bridge)
259 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
260 , node_type = nodeTypeWith multi label
261 , node_id = (cs . show) n
262 , node_label = unNgramsTerm label
263 , node_x_coord = 0
264 , node_y_coord = 0
265 , node_attributes =
266 Attributes { clust_default = fromMaybe 0
267 (Map.lookup n community_id_by_node_id)
268 }
269 , node_children = []
270 }
271 )
272 | (label, n) <- labels
273 , Set.member n toKeep
274 ]
275
276 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
277
278 edges = [ Edge { edge_source = cs (show s)
279 , edge_hidden = Nothing
280 , edge_target = cs (show t)
281 , edge_weight = weight
282 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
283 , edge_id = cs (show i)
284 }
285 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
286 , s /= t
287 , weight > 0
288 ]
289
290 community_id_by_node_id = Map.fromList
291 $ map nodeId2comId partitions
292
293 labels = Map.toList labels'
294
295
296 ------------------------------------------------------------------------
297
298 data Layout = KamadaKawai | ACP | ForceAtlas
299
300
301 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
302 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
303 where
304 (x,y) = f i
305
306
307 -- | ACP
308 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
309 setCoord l labels m (n,node) = node { node_x_coord = x
310 , node_y_coord = y
311 }
312 where
313 (x,y) = getCoord l labels m n
314
315
316 getCoord :: Ord a
317 => Layout
318 -> [(a, Int)]
319 -> Map (Int, Int) Double
320 -> Int
321 -> (Double, Double)
322 getCoord KamadaKawai _ _m _n = undefined -- layout m n
323
324 getCoord ForceAtlas _ _ n = (sin d, cos d)
325 where
326 d = fromIntegral n
327
328 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
329 $ Map.lookup n
330 $ pcaReduceTo (Dimension 2)
331 $ mapArray labels m
332 where
333 to2d :: Vec.Vector Double -> (Double, Double)
334 to2d v = (x',y')
335 where
336 ds = take 2 $ Vec.toList v
337 x' = head' "to2d" ds
338 y' = last' "to2d" ds
339
340 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
341 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
342 where
343 ns = map snd items
344
345 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
346 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
347 ------------------------------------------------------------------------
348
349 -- | KamadaKawai Layout
350 -- TODO TEST: check labels, nodeId and coordinates
351 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
352 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
353 where
354 coord :: (Map Int (Double,Double))
355 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
356 --p = Layout.defaultLGL
357 p = Layout.kamadaKawai
358 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
359
360 -----------------------------------------------------------------------------
361 -- MISC Tools
362 cooc2graph'' :: Ord t => Similarity
363 -> Double
364 -> Map (t, t) Int
365 -> Map (Index, Index) Double
366 cooc2graph'' distance threshold myCooc = neighbourMap
367 where
368 (ti, _) = createIndices myCooc
369 myCooc' = toIndex ti myCooc
370 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
371 distanceMat = measure distance matCooc
372 neighbourMap = filterByNeighbours threshold
373 $ mat2map distanceMat
374
375 -- Quentin
376 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
377 filterByNeighbours threshold distanceMap = filteredMap
378 where
379 indexes :: [Index]
380 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
381 filteredMap :: Map (Index, Index) Double
382 filteredMap = Map.fromList
383 $ List.concat
384 $ map (\idx ->
385 let selected = List.reverse
386 $ List.sortOn snd
387 $ Map.toList
388 $ Map.filter (> 0)
389 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
390 in List.take (round threshold) selected
391 ) indexes