]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[WIP] First specification for #145 issue
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
34 import Gargantext.Prelude
35 import Graph.Types (ClusterNode)
36 import IGraph.Random -- (Gen(..))
37 import Test.QuickCheck (elements)
38 import Test.QuickCheck.Arbitrary
39 import qualified Data.HashMap.Strict as HashMap
40 import qualified Data.List as List
41 import qualified Data.Map as Map
42 import qualified Data.Set as Set
43 import qualified Data.Text as Text
44 import qualified Data.Vector.Storable as Vec
45 import qualified Graph.BAC.ProxemyOptim as BAC
46 import qualified IGraph as Igraph
47 import qualified IGraph.Algorithms.Layout as Layout
48
49
50 data PartitionMethod = Spinglass | Confluence | Infomap
51 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
52 instance FromJSON PartitionMethod
53 instance ToJSON PartitionMethod
54 instance ToSchema PartitionMethod
55 instance Arbitrary PartitionMethod where
56 arbitrary = elements [ minBound .. maxBound ]
57
58
59 -------------------------------------------------------------
60 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
61 -- defaultClustering x = pure $ BAC.defaultClustering x
62 defaultClustering x = spinglass 1 x
63
64 -------------------------------------------------------------
65 type Threshold = Double
66
67
68 cooc2graph' :: Ord t => Distance
69 -> Double
70 -> Map (t, t) Int
71 -> Map (Index, Index) Double
72 cooc2graph' distance threshold myCooc
73 = Map.filter (> threshold)
74 $ mat2map
75 $ measure distance
76 $ case distance of
77 Conditional -> map2mat Triangle 0 tiSize
78 Distributional -> map2mat Square 0 tiSize
79 $ Map.filter (> 1) myCooc'
80
81 where
82 (ti, _) = createIndices myCooc
83 tiSize = Map.size ti
84 myCooc' = toIndex ti myCooc
85
86
87
88 -- coocurrences graph computation
89 cooc2graphWith :: PartitionMethod
90 -> Distance
91 -> Threshold
92 -> Strength
93 -> HashMap (NgramsTerm, NgramsTerm) Int
94 -> IO Graph
95 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
96 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
97 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
98 -- TODO: change these options, or make them configurable in UI?
99
100
101 cooc2graphWith' :: ToComId a
102 => Partitions a
103 -> Distance
104 -> Threshold
105 -> Strength
106 -> HashMap (NgramsTerm, NgramsTerm) Int
107 -> IO Graph
108 cooc2graphWith' doPartitions distance threshold strength myCooc = do
109 let (distanceMap, diag, ti) = doDistanceMap distance threshold strength myCooc
110 distanceMap `seq` diag `seq` ti `seq` return ()
111
112 --{- -- Debug
113 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
114 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
115 -- printDebug "similarities" similarities
116 --}
117
118 partitions <- if (Map.size distanceMap > 0)
119 then doPartitions distanceMap
120 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
121 , "Maybe you should add more Map Terms in your list"
122 , "Tutorial: link todo"
123 ]
124 length partitions `seq` return ()
125 let
126 nodesApprox :: Int
127 nodesApprox = n'
128 where
129 (as, bs) = List.unzip $ Map.keys distanceMap
130 n' = Set.size $ Set.fromList $ as <> bs
131 !bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
132 !confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
133 pure $ data2graph ti diag bridgeness' confluence' partitions
134
135 type Reverse = Bool
136
137 doDistanceMap :: Distance
138 -> Threshold
139 -> Strength
140 -> HashMap (NgramsTerm, NgramsTerm) Int
141 -> ( Map (Int,Int) Double
142 , Map (Index, Index) Int
143 , Map NgramsTerm Index
144 )
145 doDistanceMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
146 where
147 -- TODO remove below
148 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
149 $ Map.fromList
150 $ HashMap.toList myCooc
151
152 (ti, _it) = createIndices theMatrix
153 tiSize = Map.size ti
154
155 similarities = (\m -> m `seq` m)
156 $ (\m -> m `seq` measure Distributional m)
157 $ (\m -> m `seq` map2mat Square 0 tiSize m)
158 $ theMatrix `seq` toIndex ti theMatrix
159
160 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
161
162 distanceMap = Map.fromList
163 $ List.take links
164 $ (if strength == Weak then List.reverse else identity)
165 $ List.sortOn snd
166 $ Map.toList
167 $ edgesFilter
168 $ (\m -> m `seq` Map.filter (> threshold) m)
169 $ similarities `seq` mat2map similarities
170
171 doDistanceMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
172 where
173 myCooc' = Map.fromList $ HashMap.toList myCooc
174 (ti, _it) = createIndices myCooc'
175 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
176 distanceMap = toIndex ti
177 $ Map.fromList
178 $ List.take links
179 $ (if strength == Weak then List.reverse else identity)
180 $ List.sortOn snd
181 $ HashMap.toList
182 $ HashMap.filter (> threshold)
183 $ conditional myCooc
184
185 ----------------------------------------------------------
186 -- | From data to Graph
187
188 type Occurrences = Int
189
190 data2graph :: ToComId a
191 => Map NgramsTerm Int
192 -> Map (Int, Int) Occurrences
193 -> Map (Int, Int) Double
194 -> Map (Int, Int) Double
195 -> [a]
196 -> Graph
197 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
198 , _graph_edges = edges
199 , _graph_metadata = Nothing
200 }
201 where
202
203 nodes = map (setCoord ForceAtlas labels bridge)
204 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
205 , node_type = Terms -- or Unknown
206 , node_id = cs (show n)
207 , node_label = unNgramsTerm l
208 , node_x_coord = 0
209 , node_y_coord = 0
210 , node_attributes = Attributes { clust_default = fromMaybe 0
211 (Map.lookup n community_id_by_node_id)
212 }
213 , node_children = []
214 }
215 )
216 | (l, n) <- labels
217 , Set.member n toKeep
218 ]
219
220 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
221
222 edges = [ Edge { edge_source = cs (show s)
223 , edge_target = cs (show t)
224 , edge_weight = weight
225 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
226 , edge_id = cs (show i)
227 }
228 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
229 , s /= t
230 , weight > 0
231 ]
232
233 community_id_by_node_id = Map.fromList
234 $ map nodeId2comId partitions
235
236 labels = Map.toList labels'
237
238
239 ------------------------------------------------------------------------
240
241 data Layout = KamadaKawai | ACP | ForceAtlas
242
243
244 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
245 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
246 where
247 (x,y) = f i
248
249
250 -- | ACP
251 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
252 setCoord l labels m (n,node) = node { node_x_coord = x
253 , node_y_coord = y
254 }
255 where
256 (x,y) = getCoord l labels m n
257
258
259 getCoord :: Ord a
260 => Layout
261 -> [(a, Int)]
262 -> Map (Int, Int) Double
263 -> Int
264 -> (Double, Double)
265 getCoord KamadaKawai _ _m _n = undefined -- layout m n
266
267 getCoord ForceAtlas _ _ n = (sin d, cos d)
268 where
269 d = fromIntegral n
270
271 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
272 $ Map.lookup n
273 $ pcaReduceTo (Dimension 2)
274 $ mapArray labels m
275 where
276 to2d :: Vec.Vector Double -> (Double, Double)
277 to2d v = (x',y')
278 where
279 ds = take 2 $ Vec.toList v
280 x' = head' "to2d" ds
281 y' = last' "to2d" ds
282
283 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
284 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
285 where
286 ns = map snd items
287
288 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
289 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
290 ------------------------------------------------------------------------
291
292 -- | KamadaKawai Layout
293 -- TODO TEST: check labels, nodeId and coordinates
294 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
295 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
296 where
297 coord :: (Map Int (Double,Double))
298 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
299 --p = Layout.defaultLGL
300 p = Layout.kamadaKawai
301 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
302
303 -----------------------------------------------------------------------------
304 -- MISC Tools
305 cooc2graph'' :: Ord t => Distance
306 -> Double
307 -> Map (t, t) Int
308 -> Map (Index, Index) Double
309 cooc2graph'' distance threshold myCooc = neighbourMap
310 where
311 (ti, _) = createIndices myCooc
312 myCooc' = toIndex ti myCooc
313 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
314 distanceMat = measure distance matCooc
315 neighbourMap = filterByNeighbours threshold
316 $ mat2map distanceMat
317
318 -- Quentin
319 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
320 filterByNeighbours threshold distanceMap = filteredMap
321 where
322 indexes :: [Index]
323 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
324 filteredMap :: Map (Index, Index) Double
325 filteredMap = Map.fromList
326 $ List.concat
327 $ map (\idx ->
328 let selected = List.reverse
329 $ List.sortOn snd
330 $ Map.toList
331 $ Map.filter (> 0)
332 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
333 in List.take (round threshold) selected
334 ) indexes
335