]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
Merge branch 'dev' into 74-dev-frame-calc-csv-import
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 -- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 import Debug.Trace (trace)
22 import GHC.Float (sin, cos)
23 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
24 import Gargantext.Core.Methods.Distances (Distance(..), measure)
25 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
26 import Gargantext.Core.Statistics
27 import Gargantext.Core.Viz.Graph
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
31 import Gargantext.Core.Viz.Graph.Types (ClusterNode)
32 import Gargantext.Prelude
33 -- import qualified Graph.BAC.ProxemyOptim as BAC
34 import IGraph.Random -- (Gen(..))
35 import qualified Data.HashMap.Strict as HashMap
36 import qualified Data.List as List
37 import qualified Data.Map as Map
38 import qualified Data.Set as Set
39 import qualified Data.Vector.Storable as Vec
40 import qualified IGraph as Igraph
41 import qualified IGraph.Algorithms.Layout as Layout
42
43
44 -------------------------------------------------------------
45 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
46 -- defaultClustering x = pure $ BAC.defaultClustering x
47 defaultClustering x = spinglass 1 x
48
49 -------------------------------------------------------------
50 type Threshold = Double
51
52 cooc2graph' :: Ord t => Distance
53 -> Double
54 -> Map (t, t) Int
55 -> Map (Index, Index) Double
56 cooc2graph' distance threshold myCooc
57 = Map.filter (> threshold)
58 $ mat2map
59 $ measure distance
60 $ case distance of
61 Conditional -> map2mat Triangle 0 tiSize
62 Distributional -> map2mat Square 0 tiSize
63 $ Map.filter (> 1) myCooc'
64
65 where
66 (ti, _) = createIndices myCooc
67 tiSize = Map.size ti
68 myCooc' = toIndex ti myCooc
69
70
71 data PartitionMethod = Louvain | Spinglass -- | Bac
72
73 -- | coocurrences graph computation
74 cooc2graphWith :: PartitionMethod
75 -> Distance
76 -> Threshold
77 -> HashMap (NgramsTerm, NgramsTerm) Int
78 -> IO Graph
79 cooc2graphWith Louvain = undefined
80 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
81 -- cooc2graphWith Bac = cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
82
83
84 cooc2graphWith' :: ToComId a
85 => Partitions a
86 -> Distance
87 -> Threshold
88 -> HashMap (NgramsTerm, NgramsTerm) Int
89 -> IO Graph
90 cooc2graphWith' doPartitions distance threshold myCooc = do
91 let
92 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
93
94 nodesApprox :: Int
95 nodesApprox = n'
96 where
97 (as, bs) = List.unzip $ Map.keys distanceMap
98 n' = Set.size $ Set.fromList $ as <> bs
99 ClustersParams rivers _level = clustersParams nodesApprox
100
101 {- -- Debug
102 saveAsFileDebug "debug/distanceMap" distanceMap
103 printDebug "similarities" similarities
104 -}
105
106 partitions <- if (Map.size distanceMap > 0)
107 then doPartitions distanceMap
108 else panic "Text.Flow: DistanceMap is empty"
109
110 let
111 -- bridgeness' = distanceMap
112 bridgeness' = trace ("Rivers: " <> show rivers)
113 $ bridgeness rivers partitions distanceMap
114
115 confluence' = confluence (Map.keys bridgeness') 3 True False
116
117 pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
118 diag bridgeness' confluence' partitions
119
120
121
122 doDistanceMap :: Distance
123 -> Threshold
124 -> HashMap (NgramsTerm, NgramsTerm) Int
125 -> ( Map (Int,Int) Double
126 , Map (Index, Index) Int
127 , Map NgramsTerm Index
128 )
129 doDistanceMap distance threshold myCooc = (distanceMap, toIndex ti diag, ti)
130 where
131 -- TODO remove below
132 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
133 $ Map.fromList
134 $ HashMap.toList myCooc
135
136 (ti, _it) = createIndices theMatrix
137 tiSize = Map.size ti
138
139 matCooc = case distance of -- Shape of the Matrix
140 Conditional -> map2mat Triangle 0 tiSize
141 Distributional -> map2mat Square 0 tiSize
142 {-$ case distance of -- Removing the Diagonal ?
143 Conditional -> Map.filterWithKey (\(a,b) _ -> a /= b)
144 Distributional -> Map.filterWithKey (\(a,b) _ -> a /= b)
145 -}
146 $ toIndex ti theMatrix
147
148 similarities = measure distance matCooc
149 links = round (let n :: Double = fromIntegral tiSize in n * log n)
150
151 distanceMap = Map.fromList
152 $ List.take links
153 $ List.sortOn snd
154 $ Map.toList
155 $ Map.filter (> threshold)
156 $ mat2map similarities
157
158
159
160
161 ------------------------------------------------------------------------
162 ------------------------------------------------------------------------
163 data ClustersParams = ClustersParams { bridgness :: Double
164 , louvain :: Text
165 } deriving (Show)
166
167 clustersParams :: Int -> ClustersParams
168 clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y
169 {- where
170 y | x < 100 = "0.000001"
171 | x < 350 = "0.000001"
172 | x < 500 = "0.000001"
173 | x < 1000 = "0.000001"
174 | otherwise = "1"
175 -}
176
177 ----------------------------------------------------------
178 -- | From data to Graph
179
180 type Occurrences = Map (Int, Int) Int
181
182 data2graph :: ToComId a
183 => [(Text, Int)]
184 -> Occurrences
185 -> Map (Int, Int) Double
186 -> Map (Int, Int) Double
187 -> [a]
188 -> Graph
189 data2graph labels occurences bridge conf partitions = Graph { _graph_nodes = nodes
190 , _graph_edges = edges
191 , _graph_metadata = Nothing }
192 where
193
194 community_id_by_node_id = Map.fromList
195 $ map nodeId2comId partitions
196
197 nodes = map (setCoord ForceAtlas labels bridge)
198 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
199 , node_type = Terms -- or Unknown
200 , node_id = cs (show n)
201 , node_label = l
202 , node_x_coord = 0
203 , node_y_coord = 0
204 , node_attributes =
205 Attributes { clust_default = maybe 0 identity
206 (Map.lookup n community_id_by_node_id) }
207 , node_children = [] }
208 )
209 | (l, n) <- labels
210 , Set.member n $ Set.fromList
211 $ List.concat
212 $ map (\((s,t),d) -> if d > 0 && s /=t then [s,t] else [])
213 $ Map.toList bridge
214 ]
215
216 edges = [ Edge { edge_source = cs (show s)
217 , edge_target = cs (show t)
218 , edge_weight = d
219 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
220 -- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
221 , edge_id = cs (show i)
222 }
223 | (i, ((s,t), d)) <- zip ([0..]::[Integer] )
224 (Map.toList bridge)
225 , s /= t, d > 0
226 ]
227
228
229 ------------------------------------------------------------------------
230
231 data Layout = KamadaKawai | ACP | ForceAtlas
232
233
234 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
235 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
236 where
237 (x,y) = f i
238
239
240 -- | ACP
241 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
242 setCoord l labels m (n,node) = node { node_x_coord = x
243 , node_y_coord = y
244 }
245 where
246 (x,y) = getCoord l labels m n
247
248
249 getCoord :: Ord a
250 => Layout
251 -> [(a, Int)]
252 -> Map (Int, Int) Double
253 -> Int
254 -> (Double, Double)
255 getCoord KamadaKawai _ _m _n = undefined -- layout m n
256
257 getCoord ForceAtlas _ _ n = (sin d, cos d)
258 where
259 d = fromIntegral n
260
261 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
262 $ Map.lookup n
263 $ pcaReduceTo (Dimension 2)
264 $ mapArray labels m
265 where
266 to2d :: Vec.Vector Double -> (Double, Double)
267 to2d v = (x',y')
268 where
269 ds = take 2 $ Vec.toList v
270 x' = head' "to2d" ds
271 y' = last' "to2d" ds
272
273 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
274 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
275 where
276 ns = map snd items
277
278 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
279 toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
280 ------------------------------------------------------------------------
281
282 -- | KamadaKawai Layout
283 -- TODO TEST: check labels, nodeId and coordinates
284 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
285 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
286 where
287 coord :: (Map Int (Double,Double))
288 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
289 --p = Layout.defaultLGL
290 p = Layout.kamadaKawai
291 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
292
293 -----------------------------------------------------------------------------
294 -- MISC Tools
295 cooc2graph'' :: Ord t => Distance
296 -> Double
297 -> Map (t, t) Int
298 -> Map (Index, Index) Double
299 cooc2graph'' distance threshold myCooc = neighbourMap
300 where
301 (ti, _) = createIndices myCooc
302 myCooc' = toIndex ti myCooc
303 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
304 distanceMat = measure distance matCooc
305 neighbourMap = filterByNeighbours threshold
306 $ mat2map distanceMat
307
308 -- Quentin
309 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
310 filterByNeighbours threshold distanceMap = filteredMap
311 where
312 indexes :: [Index]
313 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
314 filteredMap :: Map (Index, Index) Double
315 filteredMap = Map.fromList
316 $ List.concat
317 $ map (\idx ->
318 let selected = List.reverse
319 $ List.sortOn snd
320 $ Map.toList
321 $ Map.filter (> 0)
322 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
323 in List.take (round threshold) selected
324 ) indexes
325
326
327
328
329