2 Module : Gargantext.Graph.Distances.Conditional
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Motivation and definition of the @Conditional@ distance.
13 {-# LANGUAGE BangPatterns #-}
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE FlexibleContexts #-}
16 {-# LANGUAGE Strict #-}
17 module Gargantext.Graph.Distances.Conditional
20 import Data.Matrix hiding (identity)
21 import Data.String.Conversions (ConvertibleStrings(..))
23 import Data.List (concat, sortOn)
24 import qualified Data.List as L
27 import qualified Data.Map as M
30 import qualified Data.Set as S
32 import Data.Vector (Vector)
33 import qualified Data.Vector as V
35 import Gargantext.Prelude
36 import Gargantext.Graph.Utils
37 ------------------------------------------------------------------------
38 -------------------------------------------------------
39 conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
40 conditional m = x' -- filter (threshold m') m'
42 ------------------------------------------------------------------------
44 -- x' = x / (sum Col x)
47 ------------------------------------------------------------------------
48 -- xs = (sum Col x') - x'
49 xs = distFromSum Col x'
50 -- ys = (sum Row x') - x'
51 ys = distFromSum Row x'
53 ------------------------------------------------------------------------
54 -- | Top included or excluded
56 -- ie = ( xs + ys) / (2 * (x.shape[0] - 1))
58 -- | Top specific or generic
60 -- sg = ( xs - ys) / (2 * (x.shape[0] - 1))
63 nodes_kept = take k' $ S.toList
64 $ foldl' (\s (n1,n2) -> insert [n1,n2] s) S.empty
66 $ nodes_included k <> nodes_specific k
68 nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
69 nodes_specific m = take m $ sortOn snd $ toListsWithIndex sg
70 insert as s = foldl' (\s' a -> S.insert a s') s as
74 dico_nodes :: Map Int Int
75 dico_nodes = M.fromList $ zip [1..] nodes_kept
76 dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
78 m' = matrix (length nodes_kept)
80 (\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')
82 threshold m = V.minimum $ V.map (\cId -> V.maximum $ getCol cId m) (V.enumFromTo 1 (nOf Col m))
84 filter t m = mapAll (\x -> filter' t x) m
86 filter' t x = case (x >= t) of
90 ------------------------------------------------------------------------
91 ------------------------------------------------------------------------
93 -- Compute the probability from axis
94 -- x' = x / (sum Col x)
95 proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
96 proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
98 ---------------------------------------------------------------
99 -- | Compute a distance from axis
100 -- xs = (sum Col x') - x'
101 distFromSum :: (Num a, Fractional a)
102 => Axis -> Matrix a -> Matrix a
103 distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
104 ---------------------------------------------------------------
105 ---------------------------------------------------------------
106 -- | To compute included/excluded or specific/generic scores
107 opWith :: (Fractional a1, Num a1)
108 => (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
109 opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
111 n = fromIntegral $ nOf Col xs
112 ---------------------------------------------------------------