]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[facet] some fixes to tfidfAll, small refactoring
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map.Strict (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 -- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, {-recursiveClustering,-} recursiveClustering', setNodes2clusterNodes)
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
31 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
32 import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map.Strict as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
50
51 data PartitionMethod = Spinglass | Confluence | Infomap
52 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
53 instance FromJSON PartitionMethod
54 instance ToJSON PartitionMethod
55 instance ToSchema PartitionMethod
56 instance Arbitrary PartitionMethod where
57 arbitrary = elements [ minBound .. maxBound ]
58
59 data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
60 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
61 instance FromJSON BridgenessMethod
62 instance ToJSON BridgenessMethod
63 instance ToSchema BridgenessMethod
64 instance Arbitrary BridgenessMethod where
65 arbitrary = elements [ minBound .. maxBound ]
66
67
68 -------------------------------------------------------------
69 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
70 -- defaultClustering x = pure $ BAC.defaultClustering x
71 defaultClustering x = spinglass 1 x
72
73 -------------------------------------------------------------
74 type Threshold = Double
75
76
77 cooc2graph' :: Ord t => Similarity
78 -> Double
79 -> Map (t, t) Int
80 -> Map (Index, Index) Double
81 cooc2graph' distance threshold myCooc
82 = Map.filter (> threshold)
83 $ mat2map
84 $ measure distance
85 $ case distance of
86 Conditional -> map2mat Triangle 0 tiSize
87 Distributional -> map2mat Square 0 tiSize
88 $ Map.filter (> 1) myCooc'
89
90 where
91 (ti, _) = createIndices myCooc
92 tiSize = Map.size ti
93 myCooc' = toIndex ti myCooc
94
95
96
97 -- coocurrences graph computation
98 cooc2graphWith :: PartitionMethod
99 -> BridgenessMethod
100 -> MultiPartite
101 -> Similarity
102 -> Threshold
103 -> Strength
104 -> HashMap (NgramsTerm, NgramsTerm) Int
105 -> IO Graph
106 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
107 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
108 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
109 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
110 -- TODO: change these options, or make them configurable in UI?
111
112 cooc2graphWith' :: Partitions
113 -> BridgenessMethod
114 -> MultiPartite
115 -> Similarity
116 -> Threshold
117 -> Strength
118 -> HashMap (NgramsTerm, NgramsTerm) Int
119 -> IO Graph
120 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold strength myCooc = do
121 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
122 distanceMap `seq` diag `seq` ti `seq` return ()
123
124 partitions <- if (Map.size distanceMap > 0)
125 then recursiveClustering' (spinglass' 1) distanceMap
126 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
127 , "Maybe you should add more Map Terms in your list"
128 , "Tutorial: TODO"
129 ]
130 length partitions `seq` return ()
131
132 let
133 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
134 !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0 similarity) distanceMap
135
136 pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
137
138 {-
139 cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Distributional threshold strength myCooc = do
140 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
141 distanceMap `seq` diag `seq` ti `seq` return ()
142
143 partitions <- if (Map.size distanceMap > 0)
144 then recursiveClustering (spinglass 1) distanceMap
145 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
146 , "Maybe you should add more Map Terms in your list"
147 , "Tutorial: TODO"
148 ]
149 length partitions `seq` return ()
150
151 let
152 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
153 !bridgeness' = bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
154
155 pure $ data2graph multi ti diag bridgeness' confluence' partitions
156 -}
157
158
159
160 type Reverse = Bool
161
162 doSimilarityMap :: Similarity
163 -> Threshold
164 -> Strength
165 -> HashMap (NgramsTerm, NgramsTerm) Int
166 -> ( Map (Int,Int) Double
167 , Map (Index, Index) Int
168 , Map NgramsTerm Index
169 )
170
171 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
172 where
173 myCooc' = Map.fromList $ HashMap.toList myCooc
174
175 (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
176 $ Map.fromList
177 $ HashMap.toList myCooc
178
179 (ti, _it) = createIndices theMatrix
180 tiSize = Map.size ti
181
182 similarities = (\m -> m `seq` m)
183 $ (\m -> m `seq` measure Conditional m)
184 $ (\m -> m `seq` map2mat Square 0 tiSize m)
185 $ theMatrix `seq` toIndex ti theMatrix
186
187 links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
188 distanceMap = Map.fromList
189 $ List.take links
190 $ (if strength == Weak then List.reverse else identity)
191 $ List.sortOn snd
192 $ Map.toList
193 $ Map.filter (> threshold)
194 -- $ conditional myCooc
195 $ similarities `seq` mat2map similarities
196
197 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
198 where
199 -- TODO remove below
200 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
201 $ Map.fromList
202 $ HashMap.toList myCooc
203
204 (ti, _it) = createIndices theMatrix
205 tiSize = Map.size ti
206
207 similarities = (\m -> m `seq` m)
208 $ (\m -> m `seq` measure Distributional m)
209 $ (\m -> m `seq` map2mat Square 0 tiSize m)
210 $ theMatrix `seq` toIndex ti theMatrix
211
212 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
213
214 distanceMap = Map.fromList
215 $ List.take links
216 $ (if strength == Weak then List.reverse else identity)
217 $ List.sortOn snd
218 $ Map.toList
219 $ edgesFilter
220 $ (\m -> m `seq` Map.filter (> threshold) m)
221 $ similarities `seq` mat2map similarities
222
223 ----------------------------------------------------------
224 -- | From data to Graph
225 type Occurrences = Int
226
227 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
228 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
229 if HashSet.member t s1
230 then t1
231 else t2
232
233 data2graph :: MultiPartite
234 -> Map NgramsTerm Int
235 -> Map (Int, Int) Occurrences
236 -> Map (Int, Int) Double
237 -> Map (Int, Int) Double
238 -> [ClusterNode]
239 -> Graph
240 data2graph multi labels' occurences bridge conf partitions =
241 Graph { _graph_nodes = nodes
242 , _graph_edges = edges
243 , _graph_metadata = Nothing
244 }
245
246 where
247
248 nodes = map (setCoord ForceAtlas labels bridge)
249 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
250 , node_type = nodeTypeWith multi label
251 , node_id = (cs . show) n
252 , node_label = unNgramsTerm label
253 , node_x_coord = 0
254 , node_y_coord = 0
255 , node_attributes =
256 Attributes { clust_default = fromMaybe 0
257 (Map.lookup n community_id_by_node_id)
258 }
259 , node_children = []
260 }
261 )
262 | (label, n) <- labels
263 , Set.member n toKeep
264 ]
265
266 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
267
268 edges = [ Edge { edge_source = cs (show s)
269 , edge_hidden = Nothing
270 , edge_target = cs (show t)
271 , edge_weight = weight
272 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
273 , edge_id = cs (show i)
274 }
275 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
276 , s /= t
277 , weight > 0
278 ]
279
280 community_id_by_node_id = Map.fromList
281 $ map nodeId2comId partitions
282
283 labels = Map.toList labels'
284
285
286 ------------------------------------------------------------------------
287
288 data Layout = KamadaKawai | ACP | ForceAtlas
289
290
291 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
292 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
293 where
294 (x,y) = f i
295
296
297 -- | ACP
298 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
299 setCoord l labels m (n,node) = node { node_x_coord = x
300 , node_y_coord = y
301 }
302 where
303 (x,y) = getCoord l labels m n
304
305
306 getCoord :: Ord a
307 => Layout
308 -> [(a, Int)]
309 -> Map (Int, Int) Double
310 -> Int
311 -> (Double, Double)
312 getCoord KamadaKawai _ _m _n = undefined -- layout m n
313
314 getCoord ForceAtlas _ _ n = (sin d, cos d)
315 where
316 d = fromIntegral n
317
318 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
319 $ Map.lookup n
320 $ pcaReduceTo (Dimension 2)
321 $ mapArray labels m
322 where
323 to2d :: Vec.Vector Double -> (Double, Double)
324 to2d v = (x',y')
325 where
326 ds = take 2 $ Vec.toList v
327 x' = head' "to2d" ds
328 y' = last' "to2d" ds
329
330 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
331 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
332 where
333 ns = map snd items
334
335 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
336 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
337 ------------------------------------------------------------------------
338
339 -- | KamadaKawai Layout
340 -- TODO TEST: check labels, nodeId and coordinates
341 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
342 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
343 where
344 coord :: (Map Int (Double,Double))
345 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
346 --p = Layout.defaultLGL
347 p = Layout.kamadaKawai
348 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
349
350 -----------------------------------------------------------------------------
351 -- MISC Tools
352 cooc2graph'' :: Ord t => Similarity
353 -> Double
354 -> Map (t, t) Int
355 -> Map (Index, Index) Double
356 cooc2graph'' distance threshold myCooc = neighbourMap
357 where
358 (ti, _) = createIndices myCooc
359 myCooc' = toIndex ti myCooc
360 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
361 distanceMat = measure distance matCooc
362 neighbourMap = filterByNeighbours threshold
363 $ mat2map distanceMat
364
365 -- Quentin
366 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
367 filterByNeighbours threshold distanceMap = filteredMap
368 where
369 indexes :: [Index]
370 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
371 filteredMap :: Map (Index, Index) Double
372 filteredMap = Map.fromList
373 $ List.concat
374 $ map (\idx ->
375 let selected = List.reverse
376 $ List.sortOn snd
377 $ Map.toList
378 $ Map.filter (> 0)
379 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
380 in List.take (round threshold) selected
381 ) indexes