]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Bridgeness.hs
Merge branch 'dev-phylo' of https://gitlab.iscpif.fr/gargantext/haskell-gargantext...
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Bridgeness.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Bridgeness
3 Description : Bridgeness filter
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Let be a graph Bridgeness filters inter-communities links in two ways.
11 If the partitions are known, filtering is uniform to expose the communities clearly for the beginners.
12 But
13
14
15 uniformly
16 filters inter-communities links.
17
18 TODO use Map LouvainNodeId (Map LouvainNodeId)
19 -}
20
21 {-# LANGUAGE BangPatterns #-}
22
23 module Gargantext.Core.Viz.Graph.Bridgeness -- (bridgeness)
24 where
25
26 import Data.Map.Strict (Map, fromListWith, lookup, toList, mapWithKey, elems)
27 import Data.Maybe (catMaybes)
28 import Data.Ord (Down(..))
29 import Data.Set (Set)
30 import Data.Tuple.Extra (swap)
31 import Debug.Trace (trace)
32 import Gargantext.Core.Methods.Similarities (Similarity(..))
33 import Gargantext.Prelude
34 import Prelude (pi)
35 import Graph.Types (ClusterNode(..))
36 import qualified Data.List as List
37 import qualified Data.Map.Strict as Map
38 import qualified Data.Set as Set
39 import qualified Data.Tuple.Extra as Tuple
40 import qualified Data.IntMap as Dico
41 ----------------------------------------------------------------------
42
43 type Partitions = Map (Int, Int) Double -> IO [ClusterNode]
44 type Partitions' = Map (Int, Int) Double -> IO [Set NodeId]
45 ----------------------------------------------------------------------
46 nodeId2comId :: ClusterNode -> (NodeId, CommunityId)
47 nodeId2comId (ClusterNode i1 i2) = (i1, i2)
48
49 type NodeId = Int
50 type CommunityId = Int
51
52 ----------------------------------------------------------------------
53 -- recursiveClustering : to get more granularity of a given clustering
54 -- tested with spinglass clustering only (WIP)
55 recursiveClustering' :: Partitions' -> Map (Int, Int) Double -> IO [[Set NodeId]]
56 recursiveClustering' f mp = do
57 let
58 n :: Double
59 n = fromIntegral $ Set.size
60 $ Set.unions $ List.concat
61 $ map (\(k1,k2) -> map Set.singleton [k1, k2])
62 $ Map.keys mp
63
64 t :: Int
65 t = round $ 2 * n / sqrt n
66
67 ss <- f mp
68 mapM (\s -> if Set.size s > t then f (removeNodes s mp) else pure [s]) ss
69
70 ----------------------------------------------------------------------
71 recursiveClustering :: Partitions -> Map (Int, Int) Double -> IO [ClusterNode]
72 recursiveClustering f mp = do
73 let
74 n :: Double
75 n = fromIntegral $ Set.size
76 $ Set.unions $ List.concat
77 $ map (\(k1,k2) -> map Set.singleton [k1, k2])
78 $ Map.keys mp
79
80 t :: Int
81 t = round $ 2 * n / sqrt n
82
83 (toSplit,others) <- List.span (\a -> Set.size a > t) <$> clusterNodes2sets <$> f mp
84 cls' <- mapM f $ map (\s -> removeNodes s mp) toSplit
85 pure $ setNodes2clusterNodes $ others <> (List.concat $ map clusterNodes2sets cls')
86
87
88 ----------------------------------------------------------------------
89 setNodes2clusterNodes :: [Set NodeId] -> [ClusterNode]
90 setNodes2clusterNodes ns = List.concat $ map (\(n,ns') -> toCluster n ns') $ zip [1..] ns
91 where
92 toCluster :: CommunityId -> Set NodeId -> [ClusterNode]
93 toCluster cId setNodeId = map (\n -> ClusterNode n cId) (Set.toList setNodeId)
94
95 clusterNodes2map :: [ClusterNode] -> Map NodeId Int
96 clusterNodes2map = Map.fromList . map (\(ClusterNode nId cId) -> (nId, cId))
97
98 removeNodes :: Set NodeId
99 -> Map (NodeId, NodeId) Double
100 -> Map (NodeId, NodeId) Double
101 removeNodes s = Map.filterWithKey (\(n1,n2) _v -> Set.member n1 s && Set.member n2 s)
102
103
104 clusterNodes2sets :: [ClusterNode] -> [Set NodeId]
105 clusterNodes2sets = Dico.elems
106 . Dico.fromListWith (<>)
107 . (map ((Tuple.second Set.singleton) . swap . nodeId2comId))
108
109 ----------------------------------------------------------------------
110 data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode]
111 , bridgeness_filter :: Double
112 }
113 | Bridgeness_Advanced { bridgeness_similarity :: Similarity
114 , bridgness_confluence :: Confluence
115 }
116 | Bridgeness_Recursive { br_partitions :: [[Set NodeId]]
117 , br_filter :: Double
118 }
119
120
121 type Confluence = Map (NodeId, NodeId) Double
122
123 -- Filter Links between the Clusters
124 -- Links: Map (NodeId, NodeId) Double
125 -- List of Clusters: [Set NodeId]
126 bridgeness :: Bridgeness
127 -> Map (NodeId, NodeId) Double
128 -> Map (NodeId, NodeId) Double
129 bridgeness (Bridgeness_Recursive sn f) m =
130 Map.unions $ [linksBetween] <> map (\s -> bridgeness (Bridgeness_Basic (setNodes2clusterNodes s) (pi*f)) m') sn
131 where
132 (linksBetween, m') = Map.partitionWithKey (\(n1,n2) _v -> Map.lookup n1 mapNodeIdClusterId
133 /= Map.lookup n2 mapNodeIdClusterId
134 ) $ bridgeness (Bridgeness_Basic clusters f) m
135 clusters = setNodes2clusterNodes (map Set.unions sn)
136 mapNodeIdClusterId = clusterNodes2map clusters
137
138
139 bridgeness (Bridgeness_Advanced sim c) m = Map.fromList
140 $ List.filter (\x -> if sim == Conditional then snd x > 0.2 else snd x > 0.02)
141 $ map (\(ks, (v1,_v2)) -> (ks,v1))
142 -- $ List.take (if sim == Conditional then 2*n else 3*n)
143 -- $ List.sortOn (Down . (snd . snd))
144 $ Map.toList
145 -- $ trace ("bridgeness3 m c" <> show (m,c))
146 $ Map.intersectionWithKey
147 (\k v1 v2 -> trace ("intersectionWithKey " <> (show (k, v1, v2))) (v1, v2)) m c
148
149
150 bridgeness (Bridgeness_Basic ns b) m = Map.fromList
151 $ List.concat
152 $ Map.elems
153 $ filterComs (round b)
154 $ groupEdges (Map.fromList $ map nodeId2comId ns) m
155
156
157 groupEdges :: (Ord comId, Ord nodeId)
158 => Map nodeId comId
159 -> Map (nodeId, nodeId) value
160 -> Map (comId, comId) [((nodeId, nodeId), value)]
161 groupEdges m = fromListWith (<>)
162 . catMaybes
163 . map (\((n1,n2), d)
164 -> let
165 n1n2_m = (,) <$> lookup n1 m <*> lookup n2 m
166 n1n2_d = Just [((n1,n2),d)]
167 in (,) <$> n1n2_m <*> n1n2_d
168 )
169 . toList
170
171 filterComs :: (Ord n1, Eq n2)
172 => Int
173 -> Map (n2, n2) [(a3, n1)]
174 -> Map (n2, n2) [(a3, n1)]
175 filterComs b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m
176 where
177 filter' (c1,c2) a
178 | c1 == c2 = a
179 -- TODO use n here
180 | otherwise = take (b * 2*n) $ List.sortOn (Down . snd) a
181 where
182 n :: Int
183 n = round $ 100 * a' / t
184 a'= fromIntegral $ length a
185 t :: Double
186 t = fromIntegral $ length $ List.concat $ elems m
187
188 --------------------------------------------------------------
189 -- Utils
190 {--
191 map2intMap :: Map (Int, Int) a -> IntMap (IntMap a)
192 map2intMap m = IntMap.fromListWith (<>)
193 $ map (\((k1,k2), v) -> if k1 < k2
194 then (k1, IntMap.singleton k2 v)
195 else (k2, IntMap.singleton k1 v)
196 )
197 $ Map.toList m
198
199 look :: (Int,Int) -> IntMap (IntMap a) -> Maybe a
200 look (k1,k2) m = if k1 < k2
201 then case (IntMap.lookup k1 m) of
202 Just m' -> IntMap.lookup k2 m'
203 _ -> Nothing
204 else look (k2,k1) m
205
206
207 {-
208 Compute the median of a list
209 From: https://hackage.haskell.org/package/dsp-0.2.5.1/docs/src/Numeric.Statistics.Median.html
210 Compute the center of the list in a more lazy manner
211 and thus halves memory requirement.
212 -}
213
214 median :: (Ord a, Fractional a) => [a] -> a
215 median [] = panic "medianFast: empty list has no median"
216 median zs =
217 let recurse (x0:_) (_:[]) = x0
218 recurse (x0:x1:_) (_:_:[]) = (x0+x1)/2
219 recurse (_:xs) (_:_:ys) = recurse xs ys
220 recurse _ _ =
221 panic "median: this error cannot occur in the way 'recurse' is called"
222 in recurse zs zs
223
224 -}