]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
Merge remote-tracking branch 'origin/415-dev-user-empty-field' into dev
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
28 import Gargantext.Core.Statistics
29 import Gargantext.Core.Viz.Graph
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.Text as Text
45 import qualified Data.Vector.Storable as Vec
46 import qualified Graph.BAC.ProxemyOptim as BAC
47 import qualified IGraph as Igraph
48 import qualified IGraph.Algorithms.Layout as Layout
49
50
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
58
59
60 -------------------------------------------------------------
61 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
62 -- defaultClustering x = pure $ BAC.defaultClustering x
63 defaultClustering x = spinglass 1 x
64
65 -------------------------------------------------------------
66 type Threshold = Double
67
68
69 cooc2graph' :: Ord t => Distance
70 -> Double
71 -> Map (t, t) Int
72 -> Map (Index, Index) Double
73 cooc2graph' distance threshold myCooc
74 = Map.filter (> threshold)
75 $ mat2map
76 $ measure distance
77 $ case distance of
78 Conditional -> map2mat Triangle 0 tiSize
79 Distributional -> map2mat Square 0 tiSize
80 $ Map.filter (> 1) myCooc'
81
82 where
83 (ti, _) = createIndices myCooc
84 tiSize = Map.size ti
85 myCooc' = toIndex ti myCooc
86
87
88
89 -- coocurrences graph computation
90 cooc2graphWith :: PartitionMethod
91 -> Distance
92 -> Threshold
93 -> HashMap (NgramsTerm, NgramsTerm) Int
94 -> IO Graph
95 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
96 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
97 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
98 -- TODO: change these options, or make them configurable in UI?
99
100
101 cooc2graphWith' :: ToComId a
102 => Partitions a
103 -> Distance
104 -> Threshold
105 -> HashMap (NgramsTerm, NgramsTerm) Int
106 -> IO Graph
107 cooc2graphWith' doPartitions distance threshold myCooc = do
108 let
109 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
110
111 --{- -- Debug
112 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
113 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
114 -- printDebug "similarities" similarities
115 --}
116
117 partitions <- if (Map.size distanceMap > 0)
118 then doPartitions distanceMap
119 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
120 , "Maybe you should add more Map Terms in your list"
121 , "Tutorial: link todo"
122 ]
123
124 let
125 nodesApprox :: Int
126 nodesApprox = n'
127 where
128 (as, bs) = List.unzip $ Map.keys distanceMap
129 n' = Set.size $ Set.fromList $ as <> bs
130 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
131 confluence' = confluence (Map.keys bridgeness') 3 True False
132
133 pure $ data2graph ti diag bridgeness' confluence' partitions
134
135
136 doDistanceMap :: Distance
137 -> Threshold
138 -> HashMap (NgramsTerm, NgramsTerm) Int
139 -> ( Map (Int,Int) Double
140 , Map (Index, Index) Int
141 , Map NgramsTerm Index
142 )
143 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
144 where
145 -- TODO remove below
146 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
147 $ Map.fromList
148 $ HashMap.toList myCooc
149
150 (ti, _it) = createIndices theMatrix
151 tiSize = Map.size ti
152
153 similarities = measure Distributional
154 $ map2mat Square 0 tiSize
155 $ toIndex ti theMatrix
156
157 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
158
159 distanceMap = Map.fromList
160 $ List.take links
161 $ List.reverse
162 $ List.sortOn snd
163 $ Map.toList
164 $ edgesFilter
165 $ Map.filter (> threshold)
166 $ mat2map similarities
167
168 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
169 where
170 myCooc' = Map.fromList $ HashMap.toList myCooc
171 (ti, _it) = createIndices myCooc'
172
173 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
174
175 distanceMap = toIndex ti
176 $ Map.fromList
177 $ List.take links
178 $ List.reverse
179 $ List.sortOn snd
180 $ HashMap.toList
181 $ HashMap.filter (> threshold)
182 $ conditional myCooc
183
184 ----------------------------------------------------------
185 -- | From data to Graph
186
187 type Occurrences = Int
188
189 data2graph :: ToComId a
190 => Map NgramsTerm Int
191 -> Map (Int, Int) Occurrences
192 -> Map (Int, Int) Double
193 -> Map (Int, Int) Double
194 -> [a]
195 -> Graph
196 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
197 , _graph_edges = edges
198 , _graph_metadata = Nothing
199 }
200 where
201
202 nodes = map (setCoord ForceAtlas labels bridge)
203 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
204 , node_type = Terms -- or Unknown
205 , node_id = cs (show n)
206 , node_label = unNgramsTerm l
207 , node_x_coord = 0
208 , node_y_coord = 0
209 , node_attributes = Attributes { clust_default = fromMaybe 0
210 (Map.lookup n community_id_by_node_id)
211 }
212 , node_children = []
213 }
214 )
215 | (l, n) <- labels
216 , Set.member n nodesWithScores
217 ]
218
219 edges = [ Edge { edge_source = cs (show s)
220 , edge_target = cs (show t)
221 , edge_weight = weight
222 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
223 , edge_id = cs (show i)
224 }
225 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
226 , s /= t
227 , weight > 0
228 ]
229
230 community_id_by_node_id = Map.fromList
231 $ map nodeId2comId partitions
232
233 labels = Map.toList labels'
234
235 nodesWithScores = Set.fromList
236 $ List.concat
237 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
238 $ Map.toList bridge
239
240
241 ------------------------------------------------------------------------
242
243 data Layout = KamadaKawai | ACP | ForceAtlas
244
245
246 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
247 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
248 where
249 (x,y) = f i
250
251
252 -- | ACP
253 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
254 setCoord l labels m (n,node) = node { node_x_coord = x
255 , node_y_coord = y
256 }
257 where
258 (x,y) = getCoord l labels m n
259
260
261 getCoord :: Ord a
262 => Layout
263 -> [(a, Int)]
264 -> Map (Int, Int) Double
265 -> Int
266 -> (Double, Double)
267 getCoord KamadaKawai _ _m _n = undefined -- layout m n
268
269 getCoord ForceAtlas _ _ n = (sin d, cos d)
270 where
271 d = fromIntegral n
272
273 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
274 $ Map.lookup n
275 $ pcaReduceTo (Dimension 2)
276 $ mapArray labels m
277 where
278 to2d :: Vec.Vector Double -> (Double, Double)
279 to2d v = (x',y')
280 where
281 ds = take 2 $ Vec.toList v
282 x' = head' "to2d" ds
283 y' = last' "to2d" ds
284
285 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
286 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
287 where
288 ns = map snd items
289
290 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
291 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
292 ------------------------------------------------------------------------
293
294 -- | KamadaKawai Layout
295 -- TODO TEST: check labels, nodeId and coordinates
296 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
297 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
298 where
299 coord :: (Map Int (Double,Double))
300 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
301 --p = Layout.defaultLGL
302 p = Layout.kamadaKawai
303 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
304
305 -----------------------------------------------------------------------------
306 -- MISC Tools
307 cooc2graph'' :: Ord t => Distance
308 -> Double
309 -> Map (t, t) Int
310 -> Map (Index, Index) Double
311 cooc2graph'' distance threshold myCooc = neighbourMap
312 where
313 (ti, _) = createIndices myCooc
314 myCooc' = toIndex ti myCooc
315 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
316 distanceMat = measure distance matCooc
317 neighbourMap = filterByNeighbours threshold
318 $ mat2map distanceMat
319
320 -- Quentin
321 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
322 filterByNeighbours threshold distanceMap = filteredMap
323 where
324 indexes :: [Index]
325 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
326 filteredMap :: Map (Index, Index) Double
327 filteredMap = Map.fromList
328 $ List.concat
329 $ map (\idx ->
330 let selected = List.reverse
331 $ List.sortOn snd
332 $ Map.toList
333 $ Map.filter (> 0)
334 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
335 in List.take (round threshold) selected
336 ) indexes
337
338
339
340
341