]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[zip] implement zip for parsers
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
28 import Gargantext.Core.Statistics
29 import Gargantext.Core.Viz.Graph
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
34 import Gargantext.Prelude
35 import Graph.Types (ClusterNode)
36 import IGraph.Random -- (Gen(..))
37 import Test.QuickCheck (elements)
38 import Test.QuickCheck.Arbitrary
39 import qualified Data.HashMap.Strict as HashMap
40 import qualified Data.List as List
41 import qualified Data.Map as Map
42 import qualified Data.Set as Set
43 import qualified Data.Vector.Storable as Vec
44 import qualified Graph.BAC.ProxemyOptim as BAC
45 import qualified IGraph as Igraph
46 import qualified IGraph.Algorithms.Layout as Layout
47
48
49 data PartitionMethod = Spinglass | Confluence
50 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
51 instance FromJSON PartitionMethod
52 instance ToJSON PartitionMethod
53 instance ToSchema PartitionMethod
54 instance Arbitrary PartitionMethod where
55 arbitrary = elements [ minBound .. maxBound ]
56
57
58 -------------------------------------------------------------
59 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
60 -- defaultClustering x = pure $ BAC.defaultClustering x
61 defaultClustering x = spinglass 1 x
62
63 -------------------------------------------------------------
64 type Threshold = Double
65
66
67 cooc2graph' :: Ord t => Distance
68 -> Double
69 -> Map (t, t) Int
70 -> Map (Index, Index) Double
71 cooc2graph' distance threshold myCooc
72 = Map.filter (> threshold)
73 $ mat2map
74 $ measure distance
75 $ case distance of
76 Conditional -> map2mat Triangle 0 tiSize
77 Distributional -> map2mat Square 0 tiSize
78 $ Map.filter (> 1) myCooc'
79
80 where
81 (ti, _) = createIndices myCooc
82 tiSize = Map.size ti
83 myCooc' = toIndex ti myCooc
84
85
86
87 -- coocurrences graph computation
88 cooc2graphWith :: PartitionMethod
89 -> Distance
90 -> Threshold
91 -> HashMap (NgramsTerm, NgramsTerm) Int
92 -> IO Graph
93 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
94 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
95
96
97 cooc2graphWith' :: ToComId a
98 => Partitions a
99 -> Distance
100 -> Threshold
101 -> HashMap (NgramsTerm, NgramsTerm) Int
102 -> IO Graph
103 cooc2graphWith' doPartitions distance threshold myCooc = do
104 let
105 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
106
107 {- -- Debug
108 saveAsFileDebug "debug/distanceMap" distanceMap
109 printDebug "similarities" similarities
110 -}
111
112 partitions <- if (Map.size distanceMap > 0)
113 then doPartitions distanceMap
114 else panic "Text.Flow: DistanceMap is empty"
115
116 let
117 nodesApprox :: Int
118 nodesApprox = n'
119 where
120 (as, bs) = List.unzip $ Map.keys distanceMap
121 n' = Set.size $ Set.fromList $ as <> bs
122 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
123 confluence' = confluence (Map.keys bridgeness') 3 True False
124
125 pure $ data2graph ti diag bridgeness' confluence' partitions
126
127
128 doDistanceMap :: Distance
129 -> Threshold
130 -> HashMap (NgramsTerm, NgramsTerm) Int
131 -> ( Map (Int,Int) Double
132 , Map (Index, Index) Int
133 , Map NgramsTerm Index
134 )
135 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
136 where
137 -- TODO remove below
138 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
139 $ Map.fromList
140 $ HashMap.toList myCooc
141
142 (ti, _it) = createIndices theMatrix
143 tiSize = Map.size ti
144
145 {-
146 matCooc = case distance of -- Shape of the Matrix
147 Conditional -> map2mat Triangle 0 tiSize
148 Distributional -> map2mat Square 0 tiSize
149 $ toIndex ti theMatrix
150 similarities = measure distance matCooc
151 -}
152
153 similarities = measure Distributional
154 $ map2mat Square 0 tiSize
155 $ toIndex ti theMatrix
156
157 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
158
159 distanceMap = Map.fromList
160 $ List.take links
161 $ List.reverse
162 $ List.sortOn snd
163 $ Map.toList
164 $ edgesFilter
165 $ Map.filter (> threshold)
166 $ mat2map similarities
167
168 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
169 where
170 myCooc' = Map.fromList $ HashMap.toList myCooc
171 (ti, _it) = createIndices myCooc'
172 tiSize = Map.size ti
173
174 links = round (let n :: Double = fromIntegral tiSize in n * log n)
175
176 distanceMap = toIndex ti
177 $ Map.fromList
178 $ List.take links
179 $ List.sortOn snd
180 $ HashMap.toList
181 $ HashMap.filter (> threshold)
182 $ conditional myCooc
183
184 ----------------------------------------------------------
185 -- | From data to Graph
186
187 type Occurrences = Int
188
189 data2graph :: ToComId a
190 => Map NgramsTerm Int
191 -> Map (Int, Int) Occurrences
192 -> Map (Int, Int) Double
193 -> Map (Int, Int) Double
194 -> [a]
195 -> Graph
196 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
197 , _graph_edges = edges
198 , _graph_metadata = Nothing
199 }
200 where
201
202 nodes = map (setCoord ForceAtlas labels bridge)
203 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
204 , node_type = Terms -- or Unknown
205 , node_id = cs (show n)
206 , node_label = unNgramsTerm l
207 , node_x_coord = 0
208 , node_y_coord = 0
209 , node_attributes = Attributes { clust_default = fromMaybe 0
210 (Map.lookup n community_id_by_node_id)
211 }
212 , node_children = [] }
213 )
214 | (l, n) <- labels
215 , Set.member n nodesWithScores
216 ]
217
218 edges = [ Edge { edge_source = cs (show s)
219 , edge_target = cs (show t)
220 , edge_weight = weight
221 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
222 , edge_id = cs (show i)
223 }
224 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
225 , s /= t
226 , weight > 0
227 ]
228
229 community_id_by_node_id = Map.fromList
230 $ map nodeId2comId partitions
231
232 labels = Map.toList labels'
233
234 nodesWithScores = Set.fromList
235 $ List.concat
236 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
237 $ Map.toList bridge
238
239
240 ------------------------------------------------------------------------
241
242 data Layout = KamadaKawai | ACP | ForceAtlas
243
244
245 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
246 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
247 where
248 (x,y) = f i
249
250
251 -- | ACP
252 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
253 setCoord l labels m (n,node) = node { node_x_coord = x
254 , node_y_coord = y
255 }
256 where
257 (x,y) = getCoord l labels m n
258
259
260 getCoord :: Ord a
261 => Layout
262 -> [(a, Int)]
263 -> Map (Int, Int) Double
264 -> Int
265 -> (Double, Double)
266 getCoord KamadaKawai _ _m _n = undefined -- layout m n
267
268 getCoord ForceAtlas _ _ n = (sin d, cos d)
269 where
270 d = fromIntegral n
271
272 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
273 $ Map.lookup n
274 $ pcaReduceTo (Dimension 2)
275 $ mapArray labels m
276 where
277 to2d :: Vec.Vector Double -> (Double, Double)
278 to2d v = (x',y')
279 where
280 ds = take 2 $ Vec.toList v
281 x' = head' "to2d" ds
282 y' = last' "to2d" ds
283
284 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
285 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
286 where
287 ns = map snd items
288
289 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
290 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
291 ------------------------------------------------------------------------
292
293 -- | KamadaKawai Layout
294 -- TODO TEST: check labels, nodeId and coordinates
295 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
296 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
297 where
298 coord :: (Map Int (Double,Double))
299 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
300 --p = Layout.defaultLGL
301 p = Layout.kamadaKawai
302 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
303
304 -----------------------------------------------------------------------------
305 -- MISC Tools
306 cooc2graph'' :: Ord t => Distance
307 -> Double
308 -> Map (t, t) Int
309 -> Map (Index, Index) Double
310 cooc2graph'' distance threshold myCooc = neighbourMap
311 where
312 (ti, _) = createIndices myCooc
313 myCooc' = toIndex ti myCooc
314 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
315 distanceMat = measure distance matCooc
316 neighbourMap = filterByNeighbours threshold
317 $ mat2map distanceMat
318
319 -- Quentin
320 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
321 filterByNeighbours threshold distanceMap = filteredMap
322 where
323 indexes :: [Index]
324 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
325 filteredMap :: Map (Index, Index) Double
326 filteredMap = Map.fromList
327 $ List.concat
328 $ map (\idx ->
329 let selected = List.reverse
330 $ List.sortOn snd
331 $ Map.toList
332 $ Map.filter (> 0)
333 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
334 in List.take (round threshold) selected
335 ) indexes
336
337
338
339
340