]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[MERGE] Phylo
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map.Strict (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 -- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, {-recursiveClustering,-} recursiveClustering', setNodes2clusterNodes)
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
31 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
32 import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map.Strict as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
50
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
58
59 data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
60 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
61 instance FromJSON BridgenessMethod
62 instance ToJSON BridgenessMethod
63 instance ToSchema BridgenessMethod
64 instance Arbitrary BridgenessMethod where
65 arbitrary = elements [ minBound .. maxBound ]
66
67
68 -------------------------------------------------------------
69 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
70 -- defaultClustering x = pure $ BAC.defaultClustering x
71 defaultClustering x = spinglass 1 x
72
73 -------------------------------------------------------------
74 type Threshold = Double
75
76 cooc2graph' :: Ord t => Similarity
77 -> Double
78 -> Map (t, t) Int
79 -> Map (Index, Index) Double
80 cooc2graph' distance threshold myCooc
81 = Map.filter (> threshold)
82 $ mat2map
83 $ measure distance
84 $ case distance of
85 Conditional -> map2mat Triangle 0 tiSize
86 _ -> map2mat Square 0 tiSize
87 $ Map.filter (> 1) myCooc'
88
89 where
90 (ti, _) = createIndices myCooc
91 tiSize = Map.size ti
92 myCooc' = toIndex ti myCooc
93
94
95 -- coocurrences graph computation
96 cooc2graphWith :: PartitionMethod
97 -> BridgenessMethod
98 -> MultiPartite
99 -> Similarity
100 -> Threshold
101 -> Strength
102 -> HashMap (NgramsTerm, NgramsTerm) Int
103 -> IO Graph
104 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
105 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
106 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
107 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
108 -- TODO: change these options, or make them configurable in UI?
109
110 cooc2graphWith' :: Partitions
111 -> BridgenessMethod
112 -> MultiPartite
113 -> Similarity
114 -> Threshold
115 -> Strength
116 -> HashMap (NgramsTerm, NgramsTerm) Int
117 -> IO Graph
118 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold strength myCooc = do
119 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
120 distanceMap `seq` diag `seq` ti `seq` return ()
121
122 partitions <- if (Map.size distanceMap > 0)
123 then recursiveClustering' (spinglass' 1) distanceMap
124 else panic $ Text.intercalate " " [ "I can not compute the graph you request"
125 , "because either the quantity of documents"
126 , "or the quantity of terms"
127 , "are lacking."
128 , "Solution: add more either Documents or Map Terms to your analysis."
129 , "Follow the available tutorials on the Training EcoSystems."
130 , "Ask your co-users of GarganText how to have access to it."
131 ]
132 length partitions `seq` return ()
133
134 let
135 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
136 !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0 similarity) distanceMap
137
138 pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
139
140 {-
141 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Distributional threshold strength myCooc = do
142 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
143 distanceMap `seq` diag `seq` ti `seq` return ()
144
145 partitions <- if (Map.size distanceMap > 0)
146 then recursiveClustering (spinglass 1) distanceMap
147 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
148 , "Maybe you should add more Map Terms in your list"
149 , "Tutorial: TODO"
150 ]
151 length partitions `seq` return ()
152
153 let
154 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
155 !bridgeness' = bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
156
157 pure $ data2graph multi ti diag bridgeness' confluence' partitions
158 -}
159
160
161
162 type Reverse = Bool
163
164 doSimilarityMap :: Similarity
165 -> Threshold
166 -> Strength
167 -> HashMap (NgramsTerm, NgramsTerm) Int
168 -> ( Map (Int,Int) Double
169 , Map (Index, Index) Int
170 , Map NgramsTerm Index
171 )
172
173 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
174 where
175 myCooc' = Map.fromList $ HashMap.toList myCooc
176
177 (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
178 $ Map.fromList
179 $ HashMap.toList myCooc
180
181 (ti, _it) = createIndices theMatrix
182 tiSize = Map.size ti
183
184 similarities = (\m -> m `seq` m)
185 $ (\m -> m `seq` measure Conditional m)
186 $ (\m -> m `seq` map2mat Square 0 tiSize m)
187 $ theMatrix `seq` toIndex ti theMatrix
188
189 links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
190 distanceMap = Map.fromList
191 $ List.take links
192 $ (if strength == Weak then List.reverse else identity)
193 $ List.sortOn snd
194 $ Map.toList
195 $ Map.filter (> threshold)
196 $ similarities `seq` mat2map similarities
197
198 doSimilarityMap distriType threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
199 where
200 -- TODO remove below
201 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
202 $ Map.fromList
203 $ HashMap.toList myCooc
204
205 (ti, _it) = createIndices theMatrix
206 tiSize = Map.size ti
207
208 similarities = (\m -> m `seq` m)
209 $ (\m -> m `seq` measure distriType m)
210 $ (\m -> m `seq` map2mat Square 0 tiSize m)
211 $ theMatrix `seq` toIndex ti theMatrix
212
213 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
214
215 distanceMap = Map.fromList
216 $ List.take links
217 $ (if strength == Weak then List.reverse else identity)
218 $ List.sortOn snd
219 $ Map.toList
220 $ edgesFilter
221 $ (\m -> m `seq` Map.filter (> threshold) m)
222 $ similarities `seq` mat2map similarities
223
224 ----------------------------------------------------------
225 -- | From data to Graph
226 type Occurrences = Int
227
228 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
229 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
230 if HashSet.member t s1
231 then t1
232 else t2
233
234 data2graph :: MultiPartite
235 -> Map NgramsTerm Int
236 -> Map (Int, Int) Occurrences
237 -> Map (Int, Int) Double
238 -> Map (Int, Int) Double
239 -> [ClusterNode]
240 -> Graph
241 data2graph multi labels' occurences bridge conf partitions =
242 Graph { _graph_nodes = nodes
243 , _graph_edges = edges
244 , _graph_metadata = Nothing
245 }
246
247 where
248
249 nodes = map (setCoord ForceAtlas labels bridge)
250 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
251 , node_type = nodeTypeWith multi label
252 , node_id = (cs . show) n
253 , node_label = unNgramsTerm label
254 , node_x_coord = 0
255 , node_y_coord = 0
256 , node_attributes =
257 Attributes { clust_default = fromMaybe 0
258 (Map.lookup n community_id_by_node_id)
259 }
260 , node_children = []
261 }
262 )
263 | (label, n) <- labels
264 , Set.member n toKeep
265 ]
266
267 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
268
269 edges = [ Edge { edge_source = cs (show s)
270 , edge_hidden = Nothing
271 , edge_target = cs (show t)
272 , edge_weight = weight
273 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
274 , edge_id = cs (show i)
275 }
276 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
277 , s /= t
278 , weight > 0
279 ]
280
281 community_id_by_node_id = Map.fromList
282 $ map nodeId2comId partitions
283
284 labels = Map.toList labels'
285
286
287 ------------------------------------------------------------------------
288
289 data Layout = KamadaKawai | ACP | ForceAtlas
290
291
292 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
293 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
294 where
295 (x,y) = f i
296
297
298 -- | ACP
299 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
300 setCoord l labels m (n,node) = node { node_x_coord = x
301 , node_y_coord = y
302 }
303 where
304 (x,y) = getCoord l labels m n
305
306
307 getCoord :: Ord a
308 => Layout
309 -> [(a, Int)]
310 -> Map (Int, Int) Double
311 -> Int
312 -> (Double, Double)
313 getCoord KamadaKawai _ _m _n = undefined -- layout m n
314
315 getCoord ForceAtlas _ _ n = (sin d, cos d)
316 where
317 d = fromIntegral n
318
319 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
320 $ Map.lookup n
321 $ pcaReduceTo (Dimension 2)
322 $ mapArray labels m
323 where
324 to2d :: Vec.Vector Double -> (Double, Double)
325 to2d v = (x',y')
326 where
327 ds = take 2 $ Vec.toList v
328 x' = head' "to2d" ds
329 y' = last' "to2d" ds
330
331 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
332 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
333 where
334 ns = map snd items
335
336 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
337 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
338 ------------------------------------------------------------------------
339
340 -- | KamadaKawai Layout
341 -- TODO TEST: check labels, nodeId and coordinates
342 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
343 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
344 where
345 coord :: (Map Int (Double,Double))
346 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
347 --p = Layout.defaultLGL
348 p = Layout.kamadaKawai
349 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
350
351 -----------------------------------------------------------------------------
352 -- MISC Tools
353 cooc2graph'' :: Ord t => Similarity
354 -> Double
355 -> Map (t, t) Int
356 -> Map (Index, Index) Double
357 cooc2graph'' distance threshold myCooc = neighbourMap
358 where
359 (ti, _) = createIndices myCooc
360 myCooc' = toIndex ti myCooc
361 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
362 distanceMat = measure distance matCooc
363 neighbourMap = filterByNeighbours threshold
364 $ mat2map distanceMat
365
366 -- Quentin
367 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
368 filterByNeighbours threshold distanceMap = filteredMap
369 where
370 indexes :: [Index]
371 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
372 filteredMap :: Map (Index, Index) Double
373 filteredMap = Map.fromList
374 $ List.concat
375 $ map (\idx ->
376 let selected = List.reverse
377 $ List.sortOn snd
378 $ Map.toList
379 $ Map.filter (> 0)
380 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
381 in List.take (round threshold) selected
382 ) indexes