]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[WIP] Bridgeness with Confluence, ok for order 1, order 2 needs more work
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness3, Partitions, ToComId(..))
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
50
51
52 data PartitionMethod = Spinglass | Confluence | Infomap
53 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
54 instance FromJSON PartitionMethod
55 instance ToJSON PartitionMethod
56 instance ToSchema PartitionMethod
57 instance Arbitrary PartitionMethod where
58 arbitrary = elements [ minBound .. maxBound ]
59
60
61 -------------------------------------------------------------
62 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
63 -- defaultClustering x = pure $ BAC.defaultClustering x
64 defaultClustering x = spinglass 1 x
65
66 -------------------------------------------------------------
67 type Threshold = Double
68
69
70 cooc2graph' :: Ord t => Similarity
71 -> Double
72 -> Map (t, t) Int
73 -> Map (Index, Index) Double
74 cooc2graph' distance threshold myCooc
75 = Map.filter (> threshold)
76 $ mat2map
77 $ measure distance
78 $ case distance of
79 Conditional -> map2mat Triangle 0 tiSize
80 Distributional -> map2mat Square 0 tiSize
81 $ Map.filter (> 1) myCooc'
82
83 where
84 (ti, _) = createIndices myCooc
85 tiSize = Map.size ti
86 myCooc' = toIndex ti myCooc
87
88
89
90 -- coocurrences graph computation
91 cooc2graphWith :: PartitionMethod
92 -> MultiPartite
93 -> Similarity
94 -> Threshold
95 -> Strength
96 -> HashMap (NgramsTerm, NgramsTerm) Int
97 -> IO Graph
98 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
99 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
100 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
101 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
102 -- TODO: change these options, or make them configurable in UI?
103
104
105 cooc2graphWith' :: ToComId a
106 => Partitions a
107 -> MultiPartite
108 -> Similarity
109 -> Threshold
110 -> Strength
111 -> HashMap (NgramsTerm, NgramsTerm) Int
112 -> IO Graph
113 cooc2graphWith' doPartitions multi similarity threshold strength myCooc = do
114 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
115 distanceMap `seq` diag `seq` ti `seq` return ()
116
117 --{- -- Debug
118 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
119 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
120 -- printDebug "similarities" similarities
121 --}
122
123 partitions <- if (Map.size distanceMap > 0)
124 then doPartitions distanceMap
125 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
126 , "Maybe you should add more Map Terms in your list"
127 , "Tutorial: link todo"
128 ]
129 length partitions `seq` return ()
130
131 let
132 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
133 !bridgeness' = bridgeness3 similarity confluence' distanceMap
134
135 pure $ data2graph multi ti diag bridgeness' confluence' partitions
136
137 type Reverse = Bool
138
139 doSimilarityMap :: Similarity
140 -> Threshold
141 -> Strength
142 -> HashMap (NgramsTerm, NgramsTerm) Int
143 -> ( Map (Int,Int) Double
144 , Map (Index, Index) Int
145 , Map NgramsTerm Index
146 )
147 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
148 where
149 -- TODO remove below
150 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
151 $ Map.fromList
152 $ HashMap.toList myCooc
153
154 (ti, _it) = createIndices theMatrix
155 tiSize = Map.size ti
156
157 similarities = (\m -> m `seq` m)
158 $ (\m -> m `seq` measure Distributional m)
159 $ (\m -> m `seq` map2mat Square 0 tiSize m)
160 $ theMatrix `seq` toIndex ti theMatrix
161
162 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
163
164 distanceMap = Map.fromList
165 $ List.take links
166 $ (if strength == Weak then List.reverse else identity)
167 $ List.sortOn snd
168 $ Map.toList
169 $ edgesFilter
170 $ (\m -> m `seq` Map.filter (> threshold) m)
171 $ similarities `seq` mat2map similarities
172
173 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
174 where
175 myCooc' = Map.fromList $ HashMap.toList myCooc
176 (ti, _it) = createIndices myCooc'
177 links = round (let n :: Double = fromIntegral (Map.size ti) in n * (log n)^(2::Int))
178 distanceMap = toIndex ti
179 $ Map.fromList
180 $ List.take links
181 $ (if strength == Weak then List.reverse else identity)
182 $ List.sortOn snd
183 $ HashMap.toList
184 $ HashMap.filter (> threshold)
185 $ conditional myCooc
186
187 ----------------------------------------------------------
188 -- | From data to Graph
189 type Occurrences = Int
190
191 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
192 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
193 if HashSet.member t s1
194 then t1
195 else t2
196
197
198 data2graph :: ToComId a
199 => MultiPartite
200 -> Map NgramsTerm Int
201 -> Map (Int, Int) Occurrences
202 -> Map (Int, Int) Double
203 -> Map (Int, Int) Double
204 -> [a]
205 -> Graph
206 data2graph multi labels' occurences bridge conf partitions =
207 Graph { _graph_nodes = nodes
208 , _graph_edges = edges
209 , _graph_metadata = Nothing
210 }
211
212 where
213
214 nodes = map (setCoord ForceAtlas labels bridge)
215 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
216 , node_type = nodeTypeWith multi label
217 , node_id = (cs . show) n
218 , node_label = unNgramsTerm label
219 , node_x_coord = 0
220 , node_y_coord = 0
221 , node_attributes =
222 Attributes { clust_default = fromMaybe 0
223 (Map.lookup n community_id_by_node_id)
224 }
225 , node_children = []
226 }
227 )
228 | (label, n) <- labels
229 , Set.member n toKeep
230 ]
231
232 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
233
234 edges = [ Edge { edge_source = cs (show s)
235 , edge_target = cs (show t)
236 , edge_weight = weight
237 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
238 , edge_id = cs (show i)
239 }
240 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
241 , s /= t
242 , weight > 0
243 ]
244
245 community_id_by_node_id = Map.fromList
246 $ map nodeId2comId partitions
247
248 labels = Map.toList labels'
249
250
251 ------------------------------------------------------------------------
252
253 data Layout = KamadaKawai | ACP | ForceAtlas
254
255
256 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
257 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
258 where
259 (x,y) = f i
260
261
262 -- | ACP
263 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
264 setCoord l labels m (n,node) = node { node_x_coord = x
265 , node_y_coord = y
266 }
267 where
268 (x,y) = getCoord l labels m n
269
270
271 getCoord :: Ord a
272 => Layout
273 -> [(a, Int)]
274 -> Map (Int, Int) Double
275 -> Int
276 -> (Double, Double)
277 getCoord KamadaKawai _ _m _n = undefined -- layout m n
278
279 getCoord ForceAtlas _ _ n = (sin d, cos d)
280 where
281 d = fromIntegral n
282
283 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
284 $ Map.lookup n
285 $ pcaReduceTo (Dimension 2)
286 $ mapArray labels m
287 where
288 to2d :: Vec.Vector Double -> (Double, Double)
289 to2d v = (x',y')
290 where
291 ds = take 2 $ Vec.toList v
292 x' = head' "to2d" ds
293 y' = last' "to2d" ds
294
295 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
296 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
297 where
298 ns = map snd items
299
300 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
301 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
302 ------------------------------------------------------------------------
303
304 -- | KamadaKawai Layout
305 -- TODO TEST: check labels, nodeId and coordinates
306 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
307 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
308 where
309 coord :: (Map Int (Double,Double))
310 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
311 --p = Layout.defaultLGL
312 p = Layout.kamadaKawai
313 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
314
315 -----------------------------------------------------------------------------
316 -- MISC Tools
317 cooc2graph'' :: Ord t => Similarity
318 -> Double
319 -> Map (t, t) Int
320 -> Map (Index, Index) Double
321 cooc2graph'' distance threshold myCooc = neighbourMap
322 where
323 (ti, _) = createIndices myCooc
324 myCooc' = toIndex ti myCooc
325 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
326 distanceMat = measure distance matCooc
327 neighbourMap = filterByNeighbours threshold
328 $ mat2map distanceMat
329
330 -- Quentin
331 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
332 filterByNeighbours threshold distanceMap = filteredMap
333 where
334 indexes :: [Index]
335 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
336 filteredMap :: Map (Index, Index) Double
337 filteredMap = Map.fromList
338 $ List.concat
339 $ map (\idx ->
340 let selected = List.reverse
341 $ List.sortOn snd
342 $ Map.toList
343 $ Map.filter (> 0)
344 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
345 in List.take (round threshold) selected
346 ) indexes
347