]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[FEAT] Graph Multipartite connected, need to change the node shape in Graph and tests
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
50
51
52 data PartitionMethod = Spinglass | Confluence | Infomap
53 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
54 instance FromJSON PartitionMethod
55 instance ToJSON PartitionMethod
56 instance ToSchema PartitionMethod
57 instance Arbitrary PartitionMethod where
58 arbitrary = elements [ minBound .. maxBound ]
59
60
61 -------------------------------------------------------------
62 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
63 -- defaultClustering x = pure $ BAC.defaultClustering x
64 defaultClustering x = spinglass 1 x
65
66 -------------------------------------------------------------
67 type Threshold = Double
68
69
70 cooc2graph' :: Ord t => Similarity
71 -> Double
72 -> Map (t, t) Int
73 -> Map (Index, Index) Double
74 cooc2graph' distance threshold myCooc
75 = Map.filter (> threshold)
76 $ mat2map
77 $ measure distance
78 $ case distance of
79 Conditional -> map2mat Triangle 0 tiSize
80 Distributional -> map2mat Square 0 tiSize
81 $ Map.filter (> 1) myCooc'
82
83 where
84 (ti, _) = createIndices myCooc
85 tiSize = Map.size ti
86 myCooc' = toIndex ti myCooc
87
88
89
90 -- coocurrences graph computation
91 cooc2graphWith :: PartitionMethod
92 -> MultiPartite
93 -> Similarity
94 -> Threshold
95 -> Strength
96 -> HashMap (NgramsTerm, NgramsTerm) Int
97 -> IO Graph
98 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
99 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
100 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
101 -- TODO: change these options, or make them configurable in UI?
102
103
104 cooc2graphWith' :: ToComId a
105 => Partitions a
106 -> MultiPartite
107 -> Similarity
108 -> Threshold
109 -> Strength
110 -> HashMap (NgramsTerm, NgramsTerm) Int
111 -> IO Graph
112 cooc2graphWith' doPartitions multi similarity threshold strength myCooc = do
113 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
114 distanceMap `seq` diag `seq` ti `seq` return ()
115
116 --{- -- Debug
117 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
118 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
119 -- printDebug "similarities" similarities
120 --}
121
122 partitions <- if (Map.size distanceMap > 0)
123 then doPartitions distanceMap
124 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
125 , "Maybe you should add more Map Terms in your list"
126 , "Tutorial: link todo"
127 ]
128 length partitions `seq` return ()
129 let
130 nodesApprox :: Int
131 nodesApprox = n'
132 where
133 (as, bs) = List.unzip $ Map.keys distanceMap
134 n' = Set.size $ Set.fromList $ as <> bs
135 !bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
136 !confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
137 pure $ data2graph multi ti diag bridgeness' confluence' partitions
138
139 type Reverse = Bool
140
141 doSimilarityMap :: Similarity
142 -> Threshold
143 -> Strength
144 -> HashMap (NgramsTerm, NgramsTerm) Int
145 -> ( Map (Int,Int) Double
146 , Map (Index, Index) Int
147 , Map NgramsTerm Index
148 )
149 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
150 where
151 -- TODO remove below
152 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
153 $ Map.fromList
154 $ HashMap.toList myCooc
155
156 (ti, _it) = createIndices theMatrix
157 tiSize = Map.size ti
158
159 similarities = (\m -> m `seq` m)
160 $ (\m -> m `seq` measure Distributional m)
161 $ (\m -> m `seq` map2mat Square 0 tiSize m)
162 $ theMatrix `seq` toIndex ti theMatrix
163
164 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
165
166 distanceMap = Map.fromList
167 $ List.take links
168 $ (if strength == Weak then List.reverse else identity)
169 $ List.sortOn snd
170 $ Map.toList
171 $ edgesFilter
172 $ (\m -> m `seq` Map.filter (> threshold) m)
173 $ similarities `seq` mat2map similarities
174
175 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
176 where
177 myCooc' = Map.fromList $ HashMap.toList myCooc
178 (ti, _it) = createIndices myCooc'
179 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
180 distanceMap = toIndex ti
181 $ Map.fromList
182 $ List.take links
183 $ (if strength == Weak then List.reverse else identity)
184 $ List.sortOn snd
185 $ HashMap.toList
186 $ HashMap.filter (> threshold)
187 $ conditional myCooc
188
189 ----------------------------------------------------------
190 -- | From data to Graph
191
192 type Occurrences = Int
193
194 multiPartiteWith :: MultiPartite -> NgramsTerm -> TypeNode
195 multiPartiteWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
196 if HashSet.member t s1
197 then typeNode t1
198 else typeNode t2
199
200 typeNode :: NgramsType -> TypeNode
201 typeNode NgramsTerms = Terms
202 typeNode _ = Unknown
203
204
205 data2graph :: ToComId a
206 => MultiPartite
207 -> Map NgramsTerm Int
208 -> Map (Int, Int) Occurrences
209 -> Map (Int, Int) Double
210 -> Map (Int, Int) Double
211 -> [a]
212 -> Graph
213 data2graph multi labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
214 , _graph_edges = edges
215 , _graph_metadata = Nothing
216 }
217 where
218
219 nodes = map (setCoord ForceAtlas labels bridge)
220 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
221 , node_type = multiPartiteWith multi l
222 , node_id = cs (show n)
223 , node_label = unNgramsTerm l
224 , node_x_coord = 0
225 , node_y_coord = 0
226 , node_attributes = Attributes { clust_default = fromMaybe 0
227 (Map.lookup n community_id_by_node_id)
228 }
229 , node_children = []
230 }
231 )
232 | (l, n) <- labels
233 , Set.member n toKeep
234 ]
235
236 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
237
238 edges = [ Edge { edge_source = cs (show s)
239 , edge_target = cs (show t)
240 , edge_weight = weight
241 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
242 , edge_id = cs (show i)
243 }
244 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
245 , s /= t
246 , weight > 0
247 ]
248
249 community_id_by_node_id = Map.fromList
250 $ map nodeId2comId partitions
251
252 labels = Map.toList labels'
253
254
255 ------------------------------------------------------------------------
256
257 data Layout = KamadaKawai | ACP | ForceAtlas
258
259
260 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
261 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
262 where
263 (x,y) = f i
264
265
266 -- | ACP
267 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
268 setCoord l labels m (n,node) = node { node_x_coord = x
269 , node_y_coord = y
270 }
271 where
272 (x,y) = getCoord l labels m n
273
274
275 getCoord :: Ord a
276 => Layout
277 -> [(a, Int)]
278 -> Map (Int, Int) Double
279 -> Int
280 -> (Double, Double)
281 getCoord KamadaKawai _ _m _n = undefined -- layout m n
282
283 getCoord ForceAtlas _ _ n = (sin d, cos d)
284 where
285 d = fromIntegral n
286
287 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
288 $ Map.lookup n
289 $ pcaReduceTo (Dimension 2)
290 $ mapArray labels m
291 where
292 to2d :: Vec.Vector Double -> (Double, Double)
293 to2d v = (x',y')
294 where
295 ds = take 2 $ Vec.toList v
296 x' = head' "to2d" ds
297 y' = last' "to2d" ds
298
299 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
300 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
301 where
302 ns = map snd items
303
304 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
305 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
306 ------------------------------------------------------------------------
307
308 -- | KamadaKawai Layout
309 -- TODO TEST: check labels, nodeId and coordinates
310 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
311 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
312 where
313 coord :: (Map Int (Double,Double))
314 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
315 --p = Layout.defaultLGL
316 p = Layout.kamadaKawai
317 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
318
319 -----------------------------------------------------------------------------
320 -- MISC Tools
321 cooc2graph'' :: Ord t => Similarity
322 -> Double
323 -> Map (t, t) Int
324 -> Map (Index, Index) Double
325 cooc2graph'' distance threshold myCooc = neighbourMap
326 where
327 (ti, _) = createIndices myCooc
328 myCooc' = toIndex ti myCooc
329 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
330 distanceMat = measure distance matCooc
331 neighbourMap = filterByNeighbours threshold
332 $ mat2map distanceMat
333
334 -- Quentin
335 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
336 filterByNeighbours threshold distanceMap = filteredMap
337 where
338 indexes :: [Index]
339 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
340 filteredMap :: Map (Index, Index) Double
341 filteredMap = Map.fromList
342 $ List.concat
343 $ map (\idx ->
344 let selected = List.reverse
345 $ List.sortOn snd
346 $ Map.toList
347 $ Map.filter (> 0)
348 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
349 in List.take (round threshold) selected
350 ) indexes
351