]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Bridgeness.hs
[FIX] Order 2 regression and split of clustering
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Bridgeness.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Bridgeness
3 Description : Bridgeness filter
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Let be a graph Bridgeness filters inter-communities links in two ways.
11 If the partitions are known, filtering is uniform to expose the communities clearly for the beginners.
12 But
13
14
15 uniformly
16 filters inter-communities links.
17
18 TODO use Map LouvainNodeId (Map LouvainNodeId)
19 -}
20
21 {-# LANGUAGE BangPatterns #-}
22
23 module Gargantext.Core.Viz.Graph.Bridgeness -- (bridgeness)
24 where
25
26 import Data.Map.Strict (Map, fromListWith, lookup, toList, mapWithKey, elems)
27 import Data.Maybe (catMaybes)
28 import Data.Ord (Down(..))
29 import Data.Set (Set)
30 import Data.Tuple.Extra (swap)
31 import Debug.Trace (trace)
32 import Gargantext.Core.Methods.Similarities (Similarity(..))
33 import Gargantext.Prelude
34 import Graph.Types (ClusterNode(..))
35 import qualified Data.List as List
36 import qualified Data.Map.Strict as Map
37 import qualified Data.Set as Set
38 import qualified Data.Tuple.Extra as Tuple
39 import qualified Data.IntMap as Dico
40 ----------------------------------------------------------------------
41
42 type Partitions = Map (Int, Int) Double -> IO [ClusterNode]
43 ----------------------------------------------------------------------
44 nodeId2comId :: ClusterNode -> (NodeId, CommunityId)
45 nodeId2comId (ClusterNode i1 i2) = (i1, i2)
46
47 type NodeId = Int
48 type CommunityId = Int
49
50 ----------------------------------------------------------------------
51 ----------------------------------------------------------------------
52 -- recursiveClustering : get get more granularity of a given clustering
53 -- tested with spinglass clustering only (WIP)
54 recursiveClustering :: Partitions -> Map (Int, Int) Double -> IO [ClusterNode]
55 recursiveClustering f mp = do
56 let
57 n :: Double
58 n = fromIntegral $ Set.size
59 $ Set.unions $ List.concat
60 $ map (\(k1,k2) -> map Set.singleton [k1, k2])
61 $ Map.keys mp
62
63 t :: Int
64 t = round $ (n / 2) * (sqrt n) / 100
65
66 (toSplit,others) <- List.span (\a -> Set.size a > t) <$> clusterNodes2sets <$> f mp
67 cls' <- mapM f $ map (\s -> removeNodes s mp) toSplit
68 pure $ setNodes2clusterNodes $ others <> (List.concat $ map clusterNodes2sets cls')
69
70 setNodes2clusterNodes :: [Set NodeId] -> [ClusterNode]
71 setNodes2clusterNodes ns = List.concat $ map (\(n,ns') -> toCluster n ns') $ zip [1..] ns
72 where
73 toCluster :: CommunityId -> Set NodeId -> [ClusterNode]
74 toCluster cId setNodeId = map (\n -> ClusterNode n cId) (Set.toList setNodeId)
75
76 removeNodes :: Set NodeId
77 -> Map (NodeId, NodeId) Double
78 -> Map (NodeId, NodeId) Double
79 removeNodes s = Map.filterWithKey (\(n1,n2) _v -> Set.member n1 s && Set.member n2 s)
80
81
82 clusterNodes2sets :: [ClusterNode] -> [Set NodeId]
83 clusterNodes2sets = Dico.elems
84 . Dico.fromListWith (<>)
85 . (map ((Tuple.second Set.singleton) . swap . nodeId2comId))
86
87 ----------------------------------------------------------------------
88 ----------------------------------------------------------------------
89 data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode]
90 , bridgeness_filter :: Double
91 }
92 | Bridgeness_Advanced { bridgeness_similarity :: Similarity
93 , bridgness_confluence :: Confluence
94 }
95
96 type Confluence = Map (NodeId, NodeId) Double
97
98
99 bridgeness :: Bridgeness
100 -> Map (NodeId, NodeId) Double
101 -> Map (NodeId, NodeId) Double
102 bridgeness (Bridgeness_Advanced sim c) m = Map.fromList
103 $ List.filter (\x -> if sim == Conditional then snd x > 0.2 else snd x > 0.02)
104 $ map (\(ks, (v1,_v2)) -> (ks,v1))
105 -- $ List.take (if sim == Conditional then 2*n else 3*n)
106 -- $ List.sortOn (Down . (snd . snd))
107 $ Map.toList
108 $ trace ("bridgeness3 m c" <> show (m,c))
109 $ Map.intersectionWithKey
110 (\k v1 v2 -> trace ("intersectionWithKey " <> (show (k, v1, v2))) (v1, v2)) m c
111
112 {-
113 where
114 !m' = Map.toList m
115 n :: Int
116 !n = trace ("bridgeness m size: " <> (show $ List.length m'))
117 $ round
118 $ (fromIntegral $ List.length m') / (log $ fromIntegral nodesNumber :: Double)
119
120 nodesNumber :: Int
121 nodesNumber = Set.size $ Set.fromList $ as <> bs
122 where
123 (as, bs) = List.unzip $ Map.keys m
124 -}
125
126
127 bridgeness (Bridgeness_Basic ns b) m = Map.fromList
128 $ List.concat
129 $ Map.elems
130 $ filterComs b
131 $ groupEdges (Map.fromList $ map nodeId2comId ns) m
132
133 groupEdges :: (Ord a, Ord b1)
134 => Map b1 a
135 -> Map (b1, b1) b2
136 -> Map (a, a) [((b1, b1), b2)]
137 groupEdges m = fromListWith (<>)
138 . catMaybes
139 . map (\((n1,n2), d)
140 -> let
141 n1n2_m = (,) <$> lookup n1 m <*> lookup n2 m
142 n1n2_d = Just [((n1,n2),d)]
143 in (,) <$> n1n2_m <*> n1n2_d
144 )
145 . toList
146
147 -- | TODO : sortOn Confluence
148 filterComs :: (Ord n1, Eq n2)
149 => p
150 -> Map (n2, n2) [(a3, n1)]
151 -> Map (n2, n2) [(a3, n1)]
152 filterComs _b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m
153 where
154 filter' (c1,c2) a
155 | c1 == c2 = a
156 -- TODO use n here
157 | otherwise = take (2*n) $ List.sortOn (Down . snd) a
158 where
159 n :: Int
160 n = round $ 100 * a' / t
161 a'= fromIntegral $ length a
162 t :: Double
163 t = fromIntegral $ length $ List.concat $ elems m
164
165 --------------------------------------------------------------
166 -- Utils
167 {--
168 map2intMap :: Map (Int, Int) a -> IntMap (IntMap a)
169 map2intMap m = IntMap.fromListWith (<>)
170 $ map (\((k1,k2), v) -> if k1 < k2
171 then (k1, IntMap.singleton k2 v)
172 else (k2, IntMap.singleton k1 v)
173 )
174 $ Map.toList m
175
176 look :: (Int,Int) -> IntMap (IntMap a) -> Maybe a
177 look (k1,k2) m = if k1 < k2
178 then case (IntMap.lookup k1 m) of
179 Just m' -> IntMap.lookup k2 m'
180 _ -> Nothing
181 else look (k2,k1) m
182
183
184 {-
185 Compute the median of a list
186 From: https://hackage.haskell.org/package/dsp-0.2.5.1/docs/src/Numeric.Statistics.Median.html
187 Compute the center of the list in a more lazy manner
188 and thus halves memory requirement.
189 -}
190
191 median :: (Ord a, Fractional a) => [a] -> a
192 median [] = panic "medianFast: empty list has no median"
193 median zs =
194 let recurse (x0:_) (_:[]) = x0
195 recurse (x0:x1:_) (_:_:[]) = (x0+x1)/2
196 recurse (_:xs) (_:_:ys) = recurse xs ys
197 recurse _ _ =
198 panic "median: this error cannot occur in the way 'recurse' is called"
199 in recurse zs zs
200
201 -}