{-| Module : Gargantext.Core.Viz.Graph.Bridgeness Description : Bridgeness filter Copyright : (c) CNRS, 2017-Present License : AGPL + CECILL v3 Maintainer : team@gargantext.org Stability : experimental Portability : POSIX Let be a graph Bridgeness filters inter-communities links in two ways. If the partitions are known, filtering is uniform to expose the communities clearly for the beginners. But uniformly filters inter-communities links. TODO use Map LouvainNodeId (Map LouvainNodeId) -} {-# LANGUAGE BangPatterns #-} module Gargantext.Core.Viz.Graph.Bridgeness -- (bridgeness) where import Data.Map.Strict (Map, fromListWith, lookup, toList, mapWithKey, elems) import Data.Maybe (catMaybes) import Data.Ord (Down(..)) import Data.Set (Set) import Data.Tuple.Extra (swap) import Debug.Trace (trace) import Gargantext.Core.Methods.Similarities (Similarity(..)) import Gargantext.Prelude import Prelude (pi) import Graph.Types (ClusterNode(..)) import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Set as Set import qualified Data.Tuple.Extra as Tuple import qualified Data.IntMap as Dico ---------------------------------------------------------------------- type Partitions = Map (Int, Int) Double -> IO [ClusterNode] type Partitions' = Map (Int, Int) Double -> IO [Set NodeId] ---------------------------------------------------------------------- nodeId2comId :: ClusterNode -> (NodeId, CommunityId) nodeId2comId (ClusterNode i1 i2) = (i1, i2) type NodeId = Int type CommunityId = Int ---------------------------------------------------------------------- -- recursiveClustering : to get more granularity of a given clustering -- tested with spinglass clustering only (WIP) recursiveClustering' :: Partitions' -> Map (Int, Int) Double -> IO [[Set NodeId]] recursiveClustering' f mp = do let n :: Double n = fromIntegral $ Set.size $ Set.unions $ List.concat $ map (\(k1,k2) -> map Set.singleton [k1, k2]) $ Map.keys mp t :: Int t = round $ 2 * n / sqrt n ss <- f mp mapM (\s -> if Set.size s > t then f (removeNodes s mp) else pure [s]) ss ---------------------------------------------------------------------- recursiveClustering :: Partitions -> Map (Int, Int) Double -> IO [ClusterNode] recursiveClustering f mp = do let n :: Double n = fromIntegral $ Set.size $ Set.unions $ List.concat $ map (\(k1,k2) -> map Set.singleton [k1, k2]) $ Map.keys mp t :: Int t = round $ 2 * n / sqrt n (toSplit,others) <- List.span (\a -> Set.size a > t) <$> clusterNodes2sets <$> f mp cls' <- mapM f $ map (\s -> removeNodes s mp) toSplit pure $ setNodes2clusterNodes $ others <> (List.concat $ map clusterNodes2sets cls') ---------------------------------------------------------------------- setNodes2clusterNodes :: [Set NodeId] -> [ClusterNode] setNodes2clusterNodes ns = List.concat $ map (\(n,ns') -> toCluster n ns') $ zip [1..] ns where toCluster :: CommunityId -> Set NodeId -> [ClusterNode] toCluster cId setNodeId = map (\n -> ClusterNode n cId) (Set.toList setNodeId) clusterNodes2map :: [ClusterNode] -> Map NodeId Int clusterNodes2map = Map.fromList . map (\(ClusterNode nId cId) -> (nId, cId)) removeNodes :: Set NodeId -> Map (NodeId, NodeId) Double -> Map (NodeId, NodeId) Double removeNodes s = Map.filterWithKey (\(n1,n2) _v -> Set.member n1 s && Set.member n2 s) clusterNodes2sets :: [ClusterNode] -> [Set NodeId] clusterNodes2sets = Dico.elems . Dico.fromListWith (<>) . (map ((Tuple.second Set.singleton) . swap . nodeId2comId)) ---------------------------------------------------------------------- data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode] , bridgeness_filter :: Double } | Bridgeness_Advanced { bridgeness_similarity :: Similarity , bridgness_confluence :: Confluence } | Bridgeness_Recursive { br_partitions :: [[Set NodeId]] , br_filter :: Double , br_similarity :: Similarity } type Confluence = Map (NodeId, NodeId) Double -- Filter Links between the Clusters -- Links: Map (NodeId, NodeId) Double -- List of Clusters: [Set NodeId] bridgeness :: Bridgeness -> Map (NodeId, NodeId) Double -> Map (NodeId, NodeId) Double bridgeness (Bridgeness_Recursive sn f sim) m = Map.unions $ [linksBetween] <> map (\s -> bridgeness (Bridgeness_Basic (setNodes2clusterNodes s) (if sim == Conditional then pi*f else f)) m') sn where (linksBetween, m') = Map.partitionWithKey (\(n1,n2) _v -> Map.lookup n1 mapNodeIdClusterId /= Map.lookup n2 mapNodeIdClusterId ) $ bridgeness (Bridgeness_Basic clusters f) m clusters = setNodes2clusterNodes (map Set.unions sn) mapNodeIdClusterId = clusterNodes2map clusters bridgeness (Bridgeness_Advanced sim c) m = Map.fromList $ List.filter (\x -> if sim == Conditional then snd x > 0.2 else snd x > 0.02) $ map (\(ks, (v1,_v2)) -> (ks,v1)) $ Map.toList $ Map.intersectionWithKey (\k v1 v2 -> trace ("intersectionWithKey " <> (show (k, v1, v2))) (v1, v2)) m c bridgeness (Bridgeness_Basic ns b) m = Map.fromList $ List.concat $ Map.elems $ filterComs (round b) $ groupEdges (Map.fromList $ map nodeId2comId ns) m groupEdges :: (Ord comId, Ord nodeId) => Map nodeId comId -> Map (nodeId, nodeId) value -> Map (comId, comId) [((nodeId, nodeId), value)] groupEdges m = fromListWith (<>) . catMaybes . map (\((n1,n2), d) -> let n1n2_m = (,) <$> lookup n1 m <*> lookup n2 m n1n2_d = Just [((n1,n2),d)] in (,) <$> n1n2_m <*> n1n2_d ) . toList filterComs :: (Ord n1, Eq n2) => Int -> Map (n2, n2) [(a3, n1)] -> Map (n2, n2) [(a3, n1)] filterComs b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m where filter' (c1,c2) a | c1 == c2 = a -- TODO use n here | otherwise = take (b * 2*n) $ List.sortOn (Down . snd) a where n :: Int n = round $ 100 * a' / t a'= fromIntegral $ length a t :: Double t = fromIntegral $ length $ List.concat $ elems m -------------------------------------------------------------- -- Utils {-- map2intMap :: Map (Int, Int) a -> IntMap (IntMap a) map2intMap m = IntMap.fromListWith (<>) $ map (\((k1,k2), v) -> if k1 < k2 then (k1, IntMap.singleton k2 v) else (k2, IntMap.singleton k1 v) ) $ Map.toList m look :: (Int,Int) -> IntMap (IntMap a) -> Maybe a look (k1,k2) m = if k1 < k2 then case (IntMap.lookup k1 m) of Just m' -> IntMap.lookup k2 m' _ -> Nothing else look (k2,k1) m {- Compute the median of a list From: https://hackage.haskell.org/package/dsp-0.2.5.1/docs/src/Numeric.Statistics.Median.html Compute the center of the list in a more lazy manner and thus halves memory requirement. -} median :: (Ord a, Fractional a) => [a] -> a median [] = panic "medianFast: empty list has no median" median zs = let recurse (x0:_) (_:[]) = x0 recurse (x0:x1:_) (_:_:[]) = (x0+x1)/2 recurse (_:xs) (_:_:ys) = recurse xs ys recurse _ _ = panic "median: this error cannot occur in the way 'recurse' is called" in recurse zs zs -}