]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
Merge remote-tracking branch 'origin/513-dev-pin-tree' into dev-merge
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map.Strict (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 -- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, {-recursiveClustering,-} recursiveClustering', setNodes2clusterNodes)
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
31 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
32 import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map.Strict as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
50
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
58
59 data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
60 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
61 instance FromJSON BridgenessMethod
62 instance ToJSON BridgenessMethod
63 instance ToSchema BridgenessMethod
64 instance Arbitrary BridgenessMethod where
65 arbitrary = elements [ minBound .. maxBound ]
66
67
68 -------------------------------------------------------------
69 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
70 -- defaultClustering x = pure $ BAC.defaultClustering x
71 defaultClustering x = spinglass 1 x
72
73 -------------------------------------------------------------
74 type Threshold = Double
75
76
77 cooc2graph' :: Ord t => Similarity
78 -> Double
79 -> Map (t, t) Int
80 -> Map (Index, Index) Double
81 cooc2graph' distance threshold myCooc
82 = Map.filter (> threshold)
83 $ mat2map
84 $ measure distance
85 $ case distance of
86 Conditional -> map2mat Triangle 0 tiSize
87 Distributional -> map2mat Square 0 tiSize
88 $ Map.filter (> 1) myCooc'
89
90 where
91 (ti, _) = createIndices myCooc
92 tiSize = Map.size ti
93 myCooc' = toIndex ti myCooc
94
95
96
97 -- coocurrences graph computation
98 cooc2graphWith :: PartitionMethod
99 -> BridgenessMethod
100 -> MultiPartite
101 -> Similarity
102 -> Threshold
103 -> Strength
104 -> HashMap (NgramsTerm, NgramsTerm) Int
105 -> IO Graph
106 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
107 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
108 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
109 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
110 -- TODO: change these options, or make them configurable in UI?
111
112 cooc2graphWith' :: Partitions
113 -> BridgenessMethod
114 -> MultiPartite
115 -> Similarity
116 -> Threshold
117 -> Strength
118 -> HashMap (NgramsTerm, NgramsTerm) Int
119 -> IO Graph
120 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold strength myCooc = do
121 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
122 distanceMap `seq` diag `seq` ti `seq` return ()
123
124 partitions <- if (Map.size distanceMap > 0)
125 then recursiveClustering' (spinglass' 1) distanceMap
126 else panic $ Text.unlines [ "I can not compute the graph you request"
127 , "because either the quantity of documents"
128 , "or the quantity of terms"
129 , "are lacking. "
130 , "Solution: add more either Documents or Map Terms to your analysis. "
131 , "Follow the available tutorials on the Training EcoSystems. "
132 , "Ask your co-users of GarganText how to have access to it."
133 ]
134 length partitions `seq` return ()
135
136 let
137 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
138 !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0 similarity) distanceMap
139
140 pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
141
142 {-
143 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Distributional threshold strength myCooc = do
144 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
145 distanceMap `seq` diag `seq` ti `seq` return ()
146
147 partitions <- if (Map.size distanceMap > 0)
148 then recursiveClustering (spinglass 1) distanceMap
149 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
150 , "Maybe you should add more Map Terms in your list"
151 , "Tutorial: TODO"
152 ]
153 length partitions `seq` return ()
154
155 let
156 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
157 !bridgeness' = bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
158
159 pure $ data2graph multi ti diag bridgeness' confluence' partitions
160 -}
161
162
163
164 type Reverse = Bool
165
166 doSimilarityMap :: Similarity
167 -> Threshold
168 -> Strength
169 -> HashMap (NgramsTerm, NgramsTerm) Int
170 -> ( Map (Int,Int) Double
171 , Map (Index, Index) Int
172 , Map NgramsTerm Index
173 )
174
175 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
176 where
177 myCooc' = Map.fromList $ HashMap.toList myCooc
178
179 (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
180 $ Map.fromList
181 $ HashMap.toList myCooc
182
183 (ti, _it) = createIndices theMatrix
184 tiSize = Map.size ti
185
186 similarities = (\m -> m `seq` m)
187 $ (\m -> m `seq` measure Conditional m)
188 $ (\m -> m `seq` map2mat Square 0 tiSize m)
189 $ theMatrix `seq` toIndex ti theMatrix
190
191 links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
192 distanceMap = Map.fromList
193 $ List.take links
194 $ (if strength == Weak then List.reverse else identity)
195 $ List.sortOn snd
196 $ Map.toList
197 $ Map.filter (> threshold)
198 $ similarities `seq` mat2map similarities
199
200 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
201 where
202 -- TODO remove below
203 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
204 $ Map.fromList
205 $ HashMap.toList myCooc
206
207 (ti, _it) = createIndices theMatrix
208 tiSize = Map.size ti
209
210 similarities = (\m -> m `seq` m)
211 $ (\m -> m `seq` measure Distributional m)
212 $ (\m -> m `seq` map2mat Square 0 tiSize m)
213 $ theMatrix `seq` toIndex ti theMatrix
214
215 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
216
217 distanceMap = Map.fromList
218 $ List.take links
219 $ (if strength == Weak then List.reverse else identity)
220 $ List.sortOn snd
221 $ Map.toList
222 $ edgesFilter
223 $ (\m -> m `seq` Map.filter (> threshold) m)
224 $ similarities `seq` mat2map similarities
225
226 ----------------------------------------------------------
227 -- | From data to Graph
228 type Occurrences = Int
229
230 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
231 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
232 if HashSet.member t s1
233 then t1
234 else t2
235
236 data2graph :: MultiPartite
237 -> Map NgramsTerm Int
238 -> Map (Int, Int) Occurrences
239 -> Map (Int, Int) Double
240 -> Map (Int, Int) Double
241 -> [ClusterNode]
242 -> Graph
243 data2graph multi labels' occurences bridge conf partitions =
244 Graph { _graph_nodes = nodes
245 , _graph_edges = edges
246 , _graph_metadata = Nothing
247 }
248
249 where
250
251 nodes = map (setCoord ForceAtlas labels bridge)
252 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
253 , node_type = nodeTypeWith multi label
254 , node_id = (cs . show) n
255 , node_label = unNgramsTerm label
256 , node_x_coord = 0
257 , node_y_coord = 0
258 , node_attributes =
259 Attributes { clust_default = fromMaybe 0
260 (Map.lookup n community_id_by_node_id)
261 }
262 , node_children = []
263 }
264 )
265 | (label, n) <- labels
266 , Set.member n toKeep
267 ]
268
269 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
270
271 edges = [ Edge { edge_source = cs (show s)
272 , edge_hidden = Nothing
273 , edge_target = cs (show t)
274 , edge_weight = weight
275 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
276 , edge_id = cs (show i)
277 }
278 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
279 , s /= t
280 , weight > 0
281 ]
282
283 community_id_by_node_id = Map.fromList
284 $ map nodeId2comId partitions
285
286 labels = Map.toList labels'
287
288
289 ------------------------------------------------------------------------
290
291 data Layout = KamadaKawai | ACP | ForceAtlas
292
293
294 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
295 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
296 where
297 (x,y) = f i
298
299
300 -- | ACP
301 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
302 setCoord l labels m (n,node) = node { node_x_coord = x
303 , node_y_coord = y
304 }
305 where
306 (x,y) = getCoord l labels m n
307
308
309 getCoord :: Ord a
310 => Layout
311 -> [(a, Int)]
312 -> Map (Int, Int) Double
313 -> Int
314 -> (Double, Double)
315 getCoord KamadaKawai _ _m _n = undefined -- layout m n
316
317 getCoord ForceAtlas _ _ n = (sin d, cos d)
318 where
319 d = fromIntegral n
320
321 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
322 $ Map.lookup n
323 $ pcaReduceTo (Dimension 2)
324 $ mapArray labels m
325 where
326 to2d :: Vec.Vector Double -> (Double, Double)
327 to2d v = (x',y')
328 where
329 ds = take 2 $ Vec.toList v
330 x' = head' "to2d" ds
331 y' = last' "to2d" ds
332
333 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
334 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
335 where
336 ns = map snd items
337
338 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
339 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
340 ------------------------------------------------------------------------
341
342 -- | KamadaKawai Layout
343 -- TODO TEST: check labels, nodeId and coordinates
344 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
345 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
346 where
347 coord :: (Map Int (Double,Double))
348 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
349 --p = Layout.defaultLGL
350 p = Layout.kamadaKawai
351 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
352
353 -----------------------------------------------------------------------------
354 -- MISC Tools
355 cooc2graph'' :: Ord t => Similarity
356 -> Double
357 -> Map (t, t) Int
358 -> Map (Index, Index) Double
359 cooc2graph'' distance threshold myCooc = neighbourMap
360 where
361 (ti, _) = createIndices myCooc
362 myCooc' = toIndex ti myCooc
363 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
364 distanceMat = measure distance matCooc
365 neighbourMap = filterByNeighbours threshold
366 $ mat2map distanceMat
367
368 -- Quentin
369 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
370 filterByNeighbours threshold distanceMap = filteredMap
371 where
372 indexes :: [Index]
373 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
374 filteredMap :: Map (Index, Index) Double
375 filteredMap = Map.fromList
376 $ List.concat
377 $ map (\idx ->
378 let selected = List.reverse
379 $ List.sortOn snd
380 $ Map.toList
381 $ Map.filter (> 0)
382 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
383 in List.take (round threshold) selected
384 ) indexes