]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
Merge remote-tracking branch 'origin/adinapoli/issue-185-job-api' into dev-merge
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map.Strict (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 -- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, {-recursiveClustering,-} recursiveClustering', setNodes2clusterNodes)
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
31 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
32 import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map.Strict as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
50
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
58
59 data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
60 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
61 instance FromJSON BridgenessMethod
62 instance ToJSON BridgenessMethod
63 instance ToSchema BridgenessMethod
64 instance Arbitrary BridgenessMethod where
65 arbitrary = elements [ minBound .. maxBound ]
66
67
68 -------------------------------------------------------------
69 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
70 -- defaultClustering x = pure $ BAC.defaultClustering x
71 defaultClustering x = spinglass 1 x
72
73 -------------------------------------------------------------
74 type Threshold = Double
75
76
77 cooc2graph' :: Ord t => Similarity
78 -> Double
79 -> Map (t, t) Int
80 -> Map (Index, Index) Double
81 cooc2graph' distance threshold myCooc
82 = Map.filter (> threshold)
83 $ mat2map
84 $ measure distance
85 $ case distance of
86 Conditional -> map2mat Triangle 0 tiSize
87 Distributional -> map2mat Square 0 tiSize
88 $ Map.filter (> 1) myCooc'
89
90 where
91 (ti, _) = createIndices myCooc
92 tiSize = Map.size ti
93 myCooc' = toIndex ti myCooc
94
95
96
97 -- coocurrences graph computation
98 cooc2graphWith :: PartitionMethod
99 -> BridgenessMethod
100 -> MultiPartite
101 -> Similarity
102 -> Threshold
103 -> Strength
104 -> HashMap (NgramsTerm, NgramsTerm) Int
105 -> IO Graph
106 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
107 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
108 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
109 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
110 -- TODO: change these options, or make them configurable in UI?
111
112 cooc2graphWith' :: Partitions
113 -> BridgenessMethod
114 -> MultiPartite
115 -> Similarity
116 -> Threshold
117 -> Strength
118 -> HashMap (NgramsTerm, NgramsTerm) Int
119 -> IO Graph
120 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold strength myCooc = do
121 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
122 distanceMap `seq` diag `seq` ti `seq` return ()
123
124 partitions <- if (Map.size distanceMap > 0)
125 then recursiveClustering' (spinglass' 1) distanceMap
126 else panic $ Text.unlines [ "I can not compute the graph you request"
127 , "because either the quantity of documents"
128 , "or the quantity of terms"
129 , "are lacking. "
130 , "Solution: add more either Documents or Map Terms to your analysis. "
131 , "Follow the available tutorials on the Training EcoSystems. "
132 , "Ask your co-users of GarganText how to have access to it."
133 ]
134 length partitions `seq` return ()
135
136 let
137 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
138 !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0 similarity) distanceMap
139
140 pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
141
142 {-
143 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Distributional threshold strength myCooc = do
144 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
145 distanceMap `seq` diag `seq` ti `seq` return ()
146
147 partitions <- if (Map.size distanceMap > 0)
148 then recursiveClustering (spinglass 1) distanceMap
149 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
150 , "Maybe you should add more Map Terms in your list"
151 , "Tutorial: TODO"
152 ]
153 length partitions `seq` return ()
154
155 let
156 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
157 !bridgeness' = bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
158
159 pure $ data2graph multi ti diag bridgeness' confluence' partitions
160 -}
161
162
163
164 type Reverse = Bool
165
166 doSimilarityMap :: Similarity
167 -> Threshold
168 -> Strength
169 -> HashMap (NgramsTerm, NgramsTerm) Int
170 -> ( Map (Int,Int) Double
171 , Map (Index, Index) Int
172 , Map NgramsTerm Index
173 )
174
175 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
176 where
177 myCooc' = Map.fromList $ HashMap.toList myCooc
178
179 (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
180 $ Map.fromList
181 $ HashMap.toList myCooc
182
183 (ti, _it) = createIndices theMatrix
184 tiSize = Map.size ti
185
186 similarities = (\m -> m `seq` m)
187 $ (\m -> m `seq` measure Conditional m)
188 $ (\m -> m `seq` map2mat Square 0 tiSize m)
189 $ theMatrix `seq` toIndex ti theMatrix
190
191 links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
192 distanceMap = Map.fromList
193 $ List.take links
194 $ (if strength == Weak then List.reverse else identity)
195 $ List.sortOn snd
196 $ Map.toList
197 $ Map.filter (> threshold)
198 -- $ conditional myCooc
199 $ similarities `seq` mat2map similarities
200
201 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
202 where
203 -- TODO remove below
204 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
205 $ Map.fromList
206 $ HashMap.toList myCooc
207
208 (ti, _it) = createIndices theMatrix
209 tiSize = Map.size ti
210
211 similarities = (\m -> m `seq` m)
212 $ (\m -> m `seq` measure Distributional m)
213 $ (\m -> m `seq` map2mat Square 0 tiSize m)
214 $ theMatrix `seq` toIndex ti theMatrix
215
216 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
217
218 distanceMap = Map.fromList
219 $ List.take links
220 $ (if strength == Weak then List.reverse else identity)
221 $ List.sortOn snd
222 $ Map.toList
223 $ edgesFilter
224 $ (\m -> m `seq` Map.filter (> threshold) m)
225 $ similarities `seq` mat2map similarities
226
227 ----------------------------------------------------------
228 -- | From data to Graph
229 type Occurrences = Int
230
231 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
232 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
233 if HashSet.member t s1
234 then t1
235 else t2
236
237 data2graph :: MultiPartite
238 -> Map NgramsTerm Int
239 -> Map (Int, Int) Occurrences
240 -> Map (Int, Int) Double
241 -> Map (Int, Int) Double
242 -> [ClusterNode]
243 -> Graph
244 data2graph multi labels' occurences bridge conf partitions =
245 Graph { _graph_nodes = nodes
246 , _graph_edges = edges
247 , _graph_metadata = Nothing
248 }
249
250 where
251
252 nodes = map (setCoord ForceAtlas labels bridge)
253 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
254 , node_type = nodeTypeWith multi label
255 , node_id = (cs . show) n
256 , node_label = unNgramsTerm label
257 , node_x_coord = 0
258 , node_y_coord = 0
259 , node_attributes =
260 Attributes { clust_default = fromMaybe 0
261 (Map.lookup n community_id_by_node_id)
262 }
263 , node_children = []
264 }
265 )
266 | (label, n) <- labels
267 , Set.member n toKeep
268 ]
269
270 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
271
272 edges = [ Edge { edge_source = cs (show s)
273 , edge_hidden = Nothing
274 , edge_target = cs (show t)
275 , edge_weight = weight
276 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
277 , edge_id = cs (show i)
278 }
279 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
280 , s /= t
281 , weight > 0
282 ]
283
284 community_id_by_node_id = Map.fromList
285 $ map nodeId2comId partitions
286
287 labels = Map.toList labels'
288
289
290 ------------------------------------------------------------------------
291
292 data Layout = KamadaKawai | ACP | ForceAtlas
293
294
295 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
296 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
297 where
298 (x,y) = f i
299
300
301 -- | ACP
302 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
303 setCoord l labels m (n,node) = node { node_x_coord = x
304 , node_y_coord = y
305 }
306 where
307 (x,y) = getCoord l labels m n
308
309
310 getCoord :: Ord a
311 => Layout
312 -> [(a, Int)]
313 -> Map (Int, Int) Double
314 -> Int
315 -> (Double, Double)
316 getCoord KamadaKawai _ _m _n = undefined -- layout m n
317
318 getCoord ForceAtlas _ _ n = (sin d, cos d)
319 where
320 d = fromIntegral n
321
322 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
323 $ Map.lookup n
324 $ pcaReduceTo (Dimension 2)
325 $ mapArray labels m
326 where
327 to2d :: Vec.Vector Double -> (Double, Double)
328 to2d v = (x',y')
329 where
330 ds = take 2 $ Vec.toList v
331 x' = head' "to2d" ds
332 y' = last' "to2d" ds
333
334 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
335 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
336 where
337 ns = map snd items
338
339 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
340 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
341 ------------------------------------------------------------------------
342
343 -- | KamadaKawai Layout
344 -- TODO TEST: check labels, nodeId and coordinates
345 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
346 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
347 where
348 coord :: (Map Int (Double,Double))
349 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
350 --p = Layout.defaultLGL
351 p = Layout.kamadaKawai
352 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
353
354 -----------------------------------------------------------------------------
355 -- MISC Tools
356 cooc2graph'' :: Ord t => Similarity
357 -> Double
358 -> Map (t, t) Int
359 -> Map (Index, Index) Double
360 cooc2graph'' distance threshold myCooc = neighbourMap
361 where
362 (ti, _) = createIndices myCooc
363 myCooc' = toIndex ti myCooc
364 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
365 distanceMat = measure distance matCooc
366 neighbourMap = filterByNeighbours threshold
367 $ mat2map distanceMat
368
369 -- Quentin
370 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
371 filterByNeighbours threshold distanceMap = filteredMap
372 where
373 indexes :: [Index]
374 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
375 filteredMap :: Map (Index, Index) Double
376 filteredMap = Map.fromList
377 $ List.concat
378 $ map (\idx ->
379 let selected = List.reverse
380 $ List.sortOn snd
381 $ Map.toList
382 $ Map.filter (> 0)
383 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
384 in List.take (round threshold) selected
385 ) indexes