2 Module : Gargantext.Graph.Distances.Conditional
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Motivation and definition of the @Conditional@ distance.
13 {-# LANGUAGE BangPatterns #-}
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE FlexibleContexts #-}
16 {-# LANGUAGE Strict #-}
17 module Gargantext.Graph.Distances.Conditional
20 import Data.Matrix hiding (identity)
21 import Data.String.Conversions (ConvertibleStrings(..))
23 import Data.List (concat, sortOn)
24 import qualified Data.List as L
27 import qualified Data.Map as M
30 import qualified Data.Set as S
32 import Data.Vector (Vector)
33 import qualified Data.Vector as V
35 import Gargantext.Prelude
36 import Gargantext.Graph.Utils
38 ------------------------------------------------------------------------
39 ------------------------------------------------------------------------
40 -- | Optimisation issue
42 toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
43 toBeOptimized m = proba Col m
45 ------------------------------------------------------------------------
47 -- Compute the probability from axis
48 -- x' = x / (sum Col x)
49 proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
50 proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
53 mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
54 mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
56 f' m' c = mapOnly a f c m'
58 mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
62 mapAll :: (a -> a) -> Matrix a -> Matrix a
63 mapAll f m = mapOn Col (\_ -> f) m
66 ---------------------------------------------------------------
67 -- | Compute a distance from axis
68 -- xs = (sum Col x') - x'
69 distFromSum :: (Num a, Fractional a)
70 => Axis -> Matrix a -> Matrix a
71 distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
72 ---------------------------------------------------------------
73 ---------------------------------------------------------------
74 -- | To compute included/excluded or specific/generic scores
75 opWith :: (Fractional a1, Num a1)
76 => (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
77 opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
79 n = fromIntegral $ nOf Col xs
80 ---------------------------------------------------------------
83 -------------------------------------------------------
84 conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
85 conditional m = filter (threshold m') m'
87 ------------------------------------------------------------------------
89 -- x' = x / (sum Col x)
92 ------------------------------------------------------------------------
93 -- xs = (sum Col x') - x'
94 xs = distFromSum Col x'
95 -- ys = (sum Row x') - x'
96 ys = distFromSum Row x'
98 ------------------------------------------------------------------------
99 -- | Top included or excluded
100 ie = opWith (+) xs ys
101 -- ie = ( xs + ys) / (2 * (x.shape[0] - 1))
103 -- | Top specific or generic
104 sg = opWith (-) xs ys
105 -- sg = ( xs - ys) / (2 * (x.shape[0] - 1))
108 nodes_kept = take k' $ S.toList
109 $ foldl' (\s (n1,n2) -> insert [n1,n2] s) S.empty
111 $ nodes_included k <> nodes_specific k
113 nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
114 nodes_specific m = take m $ sortOn snd $ toListsWithIndex sg
115 insert as s = foldl' (\s' a -> S.insert a s') s as
119 dico_nodes :: Map Int Int
120 dico_nodes = M.fromList $ zip [1..] nodes_kept
121 dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
123 m' = matrix (length nodes_kept)
125 (\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')
127 threshold m = V.minimum $ V.map (\cId -> V.maximum $ getCol cId m) (V.enumFromTo 1 (nOf Col m))
129 filter t m = mapAll (\x -> filter' t x) m
131 filter' t x = case (x >= t) of
135 ------------------------------------------------------------------------