]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
Merge remote-tracking branch 'origin/dev-merge-nix-2' into dev-merge
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Debug.Trace
18
19 import Data.Aeson
20 import Data.HashMap.Strict (HashMap)
21 import Data.Map (Map)
22 import Data.Maybe (fromMaybe)
23 import Data.Swagger hiding (items)
24 import GHC.Float (sin, cos)
25 import GHC.Generics (Generic)
26 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
27 import Gargantext.Core.Methods.Distances (Distance(..), measure)
28 import Gargantext.Core.Methods.Distances.Conditional (conditional)
29 -- import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
30 import Gargantext.Core.Statistics
31 import Gargantext.Core.Viz.Graph
32 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
33 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
34 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
35 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
36 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
37 import Gargantext.Prelude
38 import Graph.Types (ClusterNode)
39 import IGraph.Random -- (Gen(..))
40 import Test.QuickCheck (elements)
41 import Test.QuickCheck.Arbitrary
42 import qualified Data.HashMap.Strict as HashMap
43 import qualified Data.List as List
44 import qualified Data.Map as Map
45 import qualified Data.Set as Set
46 import qualified Data.Text as Text
47 import qualified Data.Vector.Storable as Vec
48 import qualified Graph.BAC.ProxemyOptim as BAC
49 import qualified IGraph as Igraph
50 import qualified IGraph.Algorithms.Layout as Layout
51
52
53 data PartitionMethod = Spinglass | Confluence | Infomap
54 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
55 instance FromJSON PartitionMethod
56 instance ToJSON PartitionMethod
57 instance ToSchema PartitionMethod
58 instance Arbitrary PartitionMethod where
59 arbitrary = elements [ minBound .. maxBound ]
60
61
62 -------------------------------------------------------------
63 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
64 -- defaultClustering x = pure $ BAC.defaultClustering x
65 defaultClustering x = spinglass 1 x
66
67 -------------------------------------------------------------
68 type Threshold = Double
69
70
71 cooc2graph' :: Ord t => Distance
72 -> Double
73 -> Map (t, t) Int
74 -> Map (Index, Index) Double
75 cooc2graph' distance threshold myCooc
76 = Map.filter (> threshold)
77 $ mat2map
78 $ measure distance
79 $ case distance of
80 Conditional -> map2mat Triangle 0 tiSize
81 Distributional -> map2mat Square 0 tiSize
82 $ Map.filter (> 1) myCooc'
83
84 where
85 (ti, _) = createIndices myCooc
86 tiSize = Map.size ti
87 myCooc' = toIndex ti myCooc
88
89
90
91 -- coocurrences graph computation
92 cooc2graphWith :: PartitionMethod
93 -> Distance
94 -> Threshold
95 -> Strength
96 -> HashMap (NgramsTerm, NgramsTerm) Int
97 -> IO Graph
98 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
99 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
100 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
101 -- TODO: change these options, or make them configurable in UI?
102
103
104 cooc2graphWith' :: ToComId a
105 => Partitions a
106 -> Distance
107 -> Threshold
108 -> Strength
109 -> HashMap (NgramsTerm, NgramsTerm) Int
110 -> IO Graph
111 cooc2graphWith' doPartitions distance threshold strength myCooc = do
112 let (distanceMap, diag, ti) = doDistanceMap distance threshold strength myCooc
113 distanceMap `seq` trace "distanceMap OK" diag `seq` trace "diag OK" ti `seq` printDebug "ti done" ()
114
115 --{- -- Debug
116 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
117 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
118 -- printDebug "similarities" similarities
119 --}
120
121 partitions <- if (Map.size distanceMap > 0)
122 then doPartitions distanceMap
123 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
124 , "Maybe you should add more Map Terms in your list"
125 , "Tutorial: link todo"
126 ]
127 partitions `seq` printDebug "partitions done" ()
128 let
129 nodesApprox :: Int
130 nodesApprox = n'
131 where
132 (as, bs) = List.unzip $ Map.keys distanceMap
133 n' = Set.size $ Set.fromList $ as <> bs
134 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
135 confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
136 -- confluence (Map.keys bridgeness') 3 True False
137 seq bridgeness' $ printDebug "bridgeness OK" ()
138 seq confluence' $ printDebug "confluence OK" ()
139 pure $ data2graph ti diag bridgeness' confluence' partitions
140
141 type Reverse = Bool
142
143 doDistanceMap :: Distance
144 -> Threshold
145 -> Strength
146 -> HashMap (NgramsTerm, NgramsTerm) Int
147 -> ( Map (Int,Int) Double
148 , Map (Index, Index) Int
149 , Map NgramsTerm Index
150 )
151 doDistanceMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
152 where
153 -- TODO remove below
154 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
155 $ Map.fromList
156 $ HashMap.toList myCooc
157
158 (ti, _it) = createIndices theMatrix
159 tiSize = Map.size ti
160
161 similarities = (\m -> m `seq` trace "measure done" m)
162 $ (\m -> m `seq` trace "map2mat done" (measure Distributional m))
163 $ (\m -> m `seq` trace "toIndex done" (map2mat Square 0 tiSize m))
164 $ theMatrix `seq` trace "theMatrix done" (toIndex ti theMatrix)
165
166 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
167
168 distanceMap = Map.fromList . trace "fromList" identity
169 $ List.take links
170 $ (if strength == Weak then List.reverse else identity)
171 $ List.sortOn snd
172 $ Map.toList
173 $ edgesFilter
174 $ (\m -> m `seq` trace "map2map done" (Map.filter (> threshold) m))
175 $ similarities `seq` mat2map (trace "similarities done" similarities)
176
177 doDistanceMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
178 where
179 myCooc' = Map.fromList $ HashMap.toList myCooc
180 (ti, _it) = createIndices myCooc'
181
182 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
183
184 distanceMap = toIndex ti
185 $ Map.fromList
186 $ List.take links
187 $ (if strength == Weak then List.reverse else identity)
188 $ List.sortOn snd
189 $ HashMap.toList
190 $ HashMap.filter (> threshold)
191 $ conditional myCooc
192
193 ----------------------------------------------------------
194 -- | From data to Graph
195
196 type Occurrences = Int
197
198 data2graph :: ToComId a
199 => Map NgramsTerm Int
200 -> Map (Int, Int) Occurrences
201 -> Map (Int, Int) Double
202 -> Map (Int, Int) Double
203 -> [a]
204 -> Graph
205 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
206 , _graph_edges = edges
207 , _graph_metadata = Nothing
208 }
209 where
210
211 nodes = map (setCoord ForceAtlas labels bridge)
212 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
213 , node_type = Terms -- or Unknown
214 , node_id = cs (show n)
215 , node_label = unNgramsTerm l
216 , node_x_coord = 0
217 , node_y_coord = 0
218 , node_attributes = Attributes { clust_default = fromMaybe 0
219 (Map.lookup n community_id_by_node_id)
220 }
221 , node_children = []
222 }
223 )
224 | (l, n) <- labels
225 , Set.member n nodesWithScores
226 ]
227
228 edges = [ Edge { edge_source = cs (show s)
229 , edge_target = cs (show t)
230 , edge_weight = weight
231 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
232 , edge_id = cs (show i)
233 }
234 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
235 , s /= t
236 , weight > 0
237 ]
238
239 community_id_by_node_id = Map.fromList
240 $ map nodeId2comId partitions
241
242 labels = Map.toList labels'
243
244 nodesWithScores = Set.fromList
245 $ List.concat
246 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
247 $ Map.toList bridge
248
249
250 ------------------------------------------------------------------------
251
252 data Layout = KamadaKawai | ACP | ForceAtlas
253
254
255 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
256 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
257 where
258 (x,y) = f i
259
260
261 -- | ACP
262 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
263 setCoord l labels m (n,node) = node { node_x_coord = x
264 , node_y_coord = y
265 }
266 where
267 (x,y) = getCoord l labels m n
268
269
270 getCoord :: Ord a
271 => Layout
272 -> [(a, Int)]
273 -> Map (Int, Int) Double
274 -> Int
275 -> (Double, Double)
276 getCoord KamadaKawai _ _m _n = undefined -- layout m n
277
278 getCoord ForceAtlas _ _ n = (sin d, cos d)
279 where
280 d = fromIntegral n
281
282 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
283 $ Map.lookup n
284 $ pcaReduceTo (Dimension 2)
285 $ mapArray labels m
286 where
287 to2d :: Vec.Vector Double -> (Double, Double)
288 to2d v = (x',y')
289 where
290 ds = take 2 $ Vec.toList v
291 x' = head' "to2d" ds
292 y' = last' "to2d" ds
293
294 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
295 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
296 where
297 ns = map snd items
298
299 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
300 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
301 ------------------------------------------------------------------------
302
303 -- | KamadaKawai Layout
304 -- TODO TEST: check labels, nodeId and coordinates
305 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
306 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
307 where
308 coord :: (Map Int (Double,Double))
309 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
310 --p = Layout.defaultLGL
311 p = Layout.kamadaKawai
312 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
313
314 -----------------------------------------------------------------------------
315 -- MISC Tools
316 cooc2graph'' :: Ord t => Distance
317 -> Double
318 -> Map (t, t) Int
319 -> Map (Index, Index) Double
320 cooc2graph'' distance threshold myCooc = neighbourMap
321 where
322 (ti, _) = createIndices myCooc
323 myCooc' = toIndex ti myCooc
324 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
325 distanceMat = measure distance matCooc
326 neighbourMap = filterByNeighbours threshold
327 $ mat2map distanceMat
328
329 -- Quentin
330 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
331 filterByNeighbours threshold distanceMap = filteredMap
332 where
333 indexes :: [Index]
334 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
335 filteredMap :: Map (Index, Index) Double
336 filteredMap = Map.fromList
337 $ List.concat
338 $ map (\idx ->
339 let selected = List.reverse
340 $ List.sortOn snd
341 $ Map.toList
342 $ Map.filter (> 0)
343 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
344 in List.take (round threshold) selected
345 ) indexes
346
347
348