]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[CLEAN] Code
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
50
51
52 data PartitionMethod = Spinglass | Confluence | Infomap
53 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
54 instance FromJSON PartitionMethod
55 instance ToJSON PartitionMethod
56 instance ToSchema PartitionMethod
57 instance Arbitrary PartitionMethod where
58 arbitrary = elements [ minBound .. maxBound ]
59
60
61 -------------------------------------------------------------
62 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
63 -- defaultClustering x = pure $ BAC.defaultClustering x
64 defaultClustering x = spinglass 1 x
65
66 -------------------------------------------------------------
67 type Threshold = Double
68
69
70 cooc2graph' :: Ord t => Similarity
71 -> Double
72 -> Map (t, t) Int
73 -> Map (Index, Index) Double
74 cooc2graph' distance threshold myCooc
75 = Map.filter (> threshold)
76 $ mat2map
77 $ measure distance
78 $ case distance of
79 Conditional -> map2mat Triangle 0 tiSize
80 Distributional -> map2mat Square 0 tiSize
81 $ Map.filter (> 1) myCooc'
82
83 where
84 (ti, _) = createIndices myCooc
85 tiSize = Map.size ti
86 myCooc' = toIndex ti myCooc
87
88
89
90 -- coocurrences graph computation
91 cooc2graphWith :: PartitionMethod
92 -> MultiPartite
93 -> Similarity
94 -> Threshold
95 -> Strength
96 -> HashMap (NgramsTerm, NgramsTerm) Int
97 -> IO Graph
98 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
99 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
100 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
101 -- TODO: change these options, or make them configurable in UI?
102
103
104 cooc2graphWith' :: ToComId a
105 => Partitions a
106 -> MultiPartite
107 -> Similarity
108 -> Threshold
109 -> Strength
110 -> HashMap (NgramsTerm, NgramsTerm) Int
111 -> IO Graph
112 cooc2graphWith' doPartitions multi similarity threshold strength myCooc = do
113 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
114 distanceMap `seq` diag `seq` ti `seq` return ()
115
116 --{- -- Debug
117 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
118 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
119 -- printDebug "similarities" similarities
120 --}
121
122 partitions <- if (Map.size distanceMap > 0)
123 then doPartitions distanceMap
124 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
125 , "Maybe you should add more Map Terms in your list"
126 , "Tutorial: link todo"
127 ]
128 length partitions `seq` return ()
129 let
130 nodesApprox :: Int
131 nodesApprox = n'
132 where
133 (as, bs) = List.unzip $ Map.keys distanceMap
134 n' = Set.size $ Set.fromList $ as <> bs
135 !bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
136 !confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
137 pure $ data2graph multi ti diag bridgeness' confluence' partitions
138
139 type Reverse = Bool
140
141 doSimilarityMap :: Similarity
142 -> Threshold
143 -> Strength
144 -> HashMap (NgramsTerm, NgramsTerm) Int
145 -> ( Map (Int,Int) Double
146 , Map (Index, Index) Int
147 , Map NgramsTerm Index
148 )
149 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
150 where
151 -- TODO remove below
152 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
153 $ Map.fromList
154 $ HashMap.toList myCooc
155
156 (ti, _it) = createIndices theMatrix
157 tiSize = Map.size ti
158
159 similarities = (\m -> m `seq` m)
160 $ (\m -> m `seq` measure Distributional m)
161 $ (\m -> m `seq` map2mat Square 0 tiSize m)
162 $ theMatrix `seq` toIndex ti theMatrix
163
164 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
165
166 distanceMap = Map.fromList
167 $ List.take links
168 $ (if strength == Weak then List.reverse else identity)
169 $ List.sortOn snd
170 $ Map.toList
171 $ edgesFilter
172 $ (\m -> m `seq` Map.filter (> threshold) m)
173 $ similarities `seq` mat2map similarities
174
175 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
176 where
177 myCooc' = Map.fromList $ HashMap.toList myCooc
178 (ti, _it) = createIndices myCooc'
179 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
180 distanceMap = toIndex ti
181 $ Map.fromList
182 $ List.take links
183 $ (if strength == Weak then List.reverse else identity)
184 $ List.sortOn snd
185 $ HashMap.toList
186 $ HashMap.filter (> threshold)
187 $ conditional myCooc
188
189 ----------------------------------------------------------
190 -- | From data to Graph
191 type Occurrences = Int
192
193 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
194 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
195 if HashSet.member t s1
196 then t1
197 else t2
198
199
200 data2graph :: ToComId a
201 => MultiPartite
202 -> Map NgramsTerm Int
203 -> Map (Int, Int) Occurrences
204 -> Map (Int, Int) Double
205 -> Map (Int, Int) Double
206 -> [a]
207 -> Graph
208 data2graph multi labels' occurences bridge conf partitions =
209 Graph { _graph_nodes = nodes
210 , _graph_edges = edges
211 , _graph_metadata = Nothing
212 }
213
214 where
215
216 nodes = map (setCoord ForceAtlas labels bridge)
217 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
218 , node_type = nodeTypeWith multi label
219 , node_id = (cs . show) n
220 , node_label = unNgramsTerm label
221 , node_x_coord = 0
222 , node_y_coord = 0
223 , node_attributes =
224 Attributes { clust_default = fromMaybe 0
225 (Map.lookup n community_id_by_node_id)
226 }
227 , node_children = []
228 }
229 )
230 | (label, n) <- labels
231 , Set.member n toKeep
232 ]
233
234 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
235
236 edges = [ Edge { edge_source = cs (show s)
237 , edge_target = cs (show t)
238 , edge_weight = weight
239 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
240 , edge_id = cs (show i)
241 }
242 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
243 , s /= t
244 , weight > 0
245 ]
246
247 community_id_by_node_id = Map.fromList
248 $ map nodeId2comId partitions
249
250 labels = Map.toList labels'
251
252
253 ------------------------------------------------------------------------
254
255 data Layout = KamadaKawai | ACP | ForceAtlas
256
257
258 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
259 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
260 where
261 (x,y) = f i
262
263
264 -- | ACP
265 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
266 setCoord l labels m (n,node) = node { node_x_coord = x
267 , node_y_coord = y
268 }
269 where
270 (x,y) = getCoord l labels m n
271
272
273 getCoord :: Ord a
274 => Layout
275 -> [(a, Int)]
276 -> Map (Int, Int) Double
277 -> Int
278 -> (Double, Double)
279 getCoord KamadaKawai _ _m _n = undefined -- layout m n
280
281 getCoord ForceAtlas _ _ n = (sin d, cos d)
282 where
283 d = fromIntegral n
284
285 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
286 $ Map.lookup n
287 $ pcaReduceTo (Dimension 2)
288 $ mapArray labels m
289 where
290 to2d :: Vec.Vector Double -> (Double, Double)
291 to2d v = (x',y')
292 where
293 ds = take 2 $ Vec.toList v
294 x' = head' "to2d" ds
295 y' = last' "to2d" ds
296
297 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
298 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
299 where
300 ns = map snd items
301
302 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
303 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
304 ------------------------------------------------------------------------
305
306 -- | KamadaKawai Layout
307 -- TODO TEST: check labels, nodeId and coordinates
308 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
309 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
310 where
311 coord :: (Map Int (Double,Double))
312 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
313 --p = Layout.defaultLGL
314 p = Layout.kamadaKawai
315 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
316
317 -----------------------------------------------------------------------------
318 -- MISC Tools
319 cooc2graph'' :: Ord t => Similarity
320 -> Double
321 -> Map (t, t) Int
322 -> Map (Index, Index) Double
323 cooc2graph'' distance threshold myCooc = neighbourMap
324 where
325 (ti, _) = createIndices myCooc
326 myCooc' = toIndex ti myCooc
327 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
328 distanceMat = measure distance matCooc
329 neighbourMap = filterByNeighbours threshold
330 $ mat2map distanceMat
331
332 -- Quentin
333 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
334 filterByNeighbours threshold distanceMap = filteredMap
335 where
336 indexes :: [Index]
337 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
338 filteredMap :: Map (Index, Index) Double
339 filteredMap = Map.fromList
340 $ List.concat
341 $ map (\idx ->
342 let selected = List.reverse
343 $ List.sortOn snd
344 $ Map.toList
345 $ Map.filter (> 0)
346 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
347 in List.take (round threshold) selected
348 ) indexes
349