]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[VERSION] +1 to 0.0.6.9.8.2
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map.Strict (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 -- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId{-, recursiveClustering-}, recursiveClustering', setNodes2clusterNodes)
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
31 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
32 import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map.Strict as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
50
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
58
59 data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
60 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
61 instance FromJSON BridgenessMethod
62 instance ToJSON BridgenessMethod
63 instance ToSchema BridgenessMethod
64 instance Arbitrary BridgenessMethod where
65 arbitrary = elements [ minBound .. maxBound ]
66
67
68 -------------------------------------------------------------
69 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
70 -- defaultClustering x = pure $ BAC.defaultClustering x
71 defaultClustering x = spinglass 1 x
72
73 -------------------------------------------------------------
74 type Threshold = Double
75
76
77 cooc2graph' :: Ord t => Similarity
78 -> Double
79 -> Map (t, t) Int
80 -> Map (Index, Index) Double
81 cooc2graph' distance threshold myCooc
82 = Map.filter (> threshold)
83 $ mat2map
84 $ measure distance
85 $ case distance of
86 Conditional -> map2mat Triangle 0 tiSize
87 Distributional -> map2mat Square 0 tiSize
88 $ Map.filter (> 1) myCooc'
89
90 where
91 (ti, _) = createIndices myCooc
92 tiSize = Map.size ti
93 myCooc' = toIndex ti myCooc
94
95
96
97 -- coocurrences graph computation
98 cooc2graphWith :: PartitionMethod
99 -> BridgenessMethod
100 -> MultiPartite
101 -> Similarity
102 -> Threshold
103 -> Strength
104 -> HashMap (NgramsTerm, NgramsTerm) Int
105 -> IO Graph
106 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
107 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
108 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
109 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
110 -- TODO: change these options, or make them configurable in UI?
111
112 cooc2graphWith' :: Partitions
113 -> BridgenessMethod
114 -> MultiPartite
115 -> Similarity
116 -> Threshold
117 -> Strength
118 -> HashMap (NgramsTerm, NgramsTerm) Int
119 -> IO Graph
120 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold strength myCooc = do
121 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
122 distanceMap `seq` diag `seq` ti `seq` return ()
123
124 partitions <- if (Map.size distanceMap > 0)
125 then recursiveClustering' (spinglass' 1) distanceMap
126 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
127 , "Maybe you should add more Map Terms in your list"
128 , "Tutorial: TODO"
129 ]
130 length partitions `seq` return ()
131
132 let
133 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
134 !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0 similarity) distanceMap
135
136 pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
137
138
139
140 type Reverse = Bool
141
142 doSimilarityMap :: Similarity
143 -> Threshold
144 -> Strength
145 -> HashMap (NgramsTerm, NgramsTerm) Int
146 -> ( Map (Int,Int) Double
147 , Map (Index, Index) Int
148 , Map NgramsTerm Index
149 )
150
151 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
152 where
153 myCooc' = Map.fromList $ HashMap.toList myCooc
154
155 (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
156 $ Map.fromList
157 $ HashMap.toList myCooc
158
159 (ti, _it) = createIndices theMatrix
160 tiSize = Map.size ti
161
162 similarities = (\m -> m `seq` m)
163 $ (\m -> m `seq` measure Conditional m)
164 $ (\m -> m `seq` map2mat Square 0 tiSize m)
165 $ theMatrix `seq` toIndex ti theMatrix
166
167 links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
168 distanceMap = Map.fromList
169 $ List.take links
170 $ (if strength == Weak then List.reverse else identity)
171 $ List.sortOn snd
172 $ Map.toList
173 $ Map.filter (> threshold)
174 -- $ conditional myCooc
175 $ similarities `seq` mat2map similarities
176
177 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
178 where
179 -- TODO remove below
180 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
181 $ Map.fromList
182 $ HashMap.toList myCooc
183
184 (ti, _it) = createIndices theMatrix
185 tiSize = Map.size ti
186
187 similarities = (\m -> m `seq` m)
188 $ (\m -> m `seq` measure Distributional m)
189 $ (\m -> m `seq` map2mat Square 0 tiSize m)
190 $ theMatrix `seq` toIndex ti theMatrix
191
192 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
193
194 distanceMap = Map.fromList
195 $ List.take links
196 $ (if strength == Weak then List.reverse else identity)
197 $ List.sortOn snd
198 $ Map.toList
199 $ edgesFilter
200 $ (\m -> m `seq` Map.filter (> threshold) m)
201 $ similarities `seq` mat2map similarities
202
203 ----------------------------------------------------------
204 -- | From data to Graph
205 type Occurrences = Int
206
207 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
208 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
209 if HashSet.member t s1
210 then t1
211 else t2
212
213 data2graph :: MultiPartite
214 -> Map NgramsTerm Int
215 -> Map (Int, Int) Occurrences
216 -> Map (Int, Int) Double
217 -> Map (Int, Int) Double
218 -> [ClusterNode]
219 -> Graph
220 data2graph multi labels' occurences bridge conf partitions =
221 Graph { _graph_nodes = nodes
222 , _graph_edges = edges
223 , _graph_metadata = Nothing
224 }
225
226 where
227
228 nodes = map (setCoord ForceAtlas labels bridge)
229 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
230 , node_type = nodeTypeWith multi label
231 , node_id = (cs . show) n
232 , node_label = unNgramsTerm label
233 , node_x_coord = 0
234 , node_y_coord = 0
235 , node_attributes =
236 Attributes { clust_default = fromMaybe 0
237 (Map.lookup n community_id_by_node_id)
238 }
239 , node_children = []
240 }
241 )
242 | (label, n) <- labels
243 , Set.member n toKeep
244 ]
245
246 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
247
248 edges = [ Edge { edge_source = cs (show s)
249 , edge_hidden = Nothing
250 , edge_target = cs (show t)
251 , edge_weight = weight
252 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
253 , edge_id = cs (show i)
254 }
255 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
256 , s /= t
257 , weight > 0
258 ]
259
260 community_id_by_node_id = Map.fromList
261 $ map nodeId2comId partitions
262
263 labels = Map.toList labels'
264
265
266 ------------------------------------------------------------------------
267
268 data Layout = KamadaKawai | ACP | ForceAtlas
269
270
271 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
272 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
273 where
274 (x,y) = f i
275
276
277 -- | ACP
278 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
279 setCoord l labels m (n,node) = node { node_x_coord = x
280 , node_y_coord = y
281 }
282 where
283 (x,y) = getCoord l labels m n
284
285
286 getCoord :: Ord a
287 => Layout
288 -> [(a, Int)]
289 -> Map (Int, Int) Double
290 -> Int
291 -> (Double, Double)
292 getCoord KamadaKawai _ _m _n = undefined -- layout m n
293
294 getCoord ForceAtlas _ _ n = (sin d, cos d)
295 where
296 d = fromIntegral n
297
298 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
299 $ Map.lookup n
300 $ pcaReduceTo (Dimension 2)
301 $ mapArray labels m
302 where
303 to2d :: Vec.Vector Double -> (Double, Double)
304 to2d v = (x',y')
305 where
306 ds = take 2 $ Vec.toList v
307 x' = head' "to2d" ds
308 y' = last' "to2d" ds
309
310 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
311 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
312 where
313 ns = map snd items
314
315 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
316 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
317 ------------------------------------------------------------------------
318
319 -- | KamadaKawai Layout
320 -- TODO TEST: check labels, nodeId and coordinates
321 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
322 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
323 where
324 coord :: (Map Int (Double,Double))
325 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
326 --p = Layout.defaultLGL
327 p = Layout.kamadaKawai
328 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
329
330 -----------------------------------------------------------------------------
331 -- MISC Tools
332 cooc2graph'' :: Ord t => Similarity
333 -> Double
334 -> Map (t, t) Int
335 -> Map (Index, Index) Double
336 cooc2graph'' distance threshold myCooc = neighbourMap
337 where
338 (ti, _) = createIndices myCooc
339 myCooc' = toIndex ti myCooc
340 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
341 distanceMat = measure distance matCooc
342 neighbourMap = filterByNeighbours threshold
343 $ mat2map distanceMat
344
345 -- Quentin
346 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
347 filterByNeighbours threshold distanceMap = filteredMap
348 where
349 indexes :: [Index]
350 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
351 filteredMap :: Map (Index, Index) Double
352 filteredMap = Map.fromList
353 $ List.concat
354 $ map (\idx ->
355 let selected = List.reverse
356 $ List.sortOn snd
357 $ Map.toList
358 $ Map.filter (> 0)
359 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
360 in List.take (round threshold) selected
361 ) indexes