]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Methods/Distances/Conditional.hs
[FEAT] Distributional Measure Similarity in Graph Flow (WIP)
[gargantext.git] / src / Gargantext / Core / Methods / Distances / Conditional.hs
1 {-|
2 Module : Gargantext.Core.Methods.Distances
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11 -}
12
13 {-# LANGUAGE BangPatterns #-}
14 {-# LANGUAGE Strict #-}
15 module Gargantext.Core.Methods.Distances.Conditional
16 where
17
18 import Data.List (sortOn)
19 import Data.Map (Map)
20 import Data.Matrix hiding (identity)
21 import Gargantext.Core.Viz.Graph.Utils
22 import Gargantext.Prelude
23 import qualified Data.Map as M
24 import qualified Data.Set as S
25 import qualified Data.Vector as V
26
27 ------------------------------------------------------------------------
28 -- | Optimisation issue
29 toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
30 toBeOptimized m = proba Col m
31
32 ------------------------------------------------------------------------
33 -- | Main Functions
34 -- Compute the probability from axis
35 -- x' = x / (sum Col x)
36 proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
37 proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
38
39
40 mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
41 mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
42 where
43 f' m' c = mapOnly a f c m'
44
45 mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
46 mapOnly Col = mapCol
47 mapOnly Row = mapRow
48
49 mapAll :: (a -> a) -> Matrix a -> Matrix a
50 mapAll f m = mapOn Col (\_ -> f) m
51
52 ---------------------------------------------------------------
53 -- | Compute a distance from axis
54 -- xs = (sum Col x') - x'
55 distFromSum :: (Num a, Fractional a)
56 => Axis -> Matrix a -> Matrix a
57 distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
58 ---------------------------------------------------------------
59 ---------------------------------------------------------------
60 -- | To compute included/excluded or specific/generic scores
61 opWith :: (Fractional a1, Num a1)
62 => (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
63 opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
64 where
65 n = fromIntegral $ nOf Col xs
66 ---------------------------------------------------------------
67
68
69 -------------------------------------------------------
70 conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
71 conditional m = filterMat (threshold m') m'
72 where
73 ------------------------------------------------------------------------
74 -- | Main Operations
75 -- x' = x / (sum Col x)
76 x' = proba Col m
77
78 ------------------------------------------------------------------------
79 -- xs = (sum Col x') - x'
80 xs = distFromSum Col x'
81 -- ys = (sum Row x') - x'
82 ys = distFromSum Row x'
83
84 ------------------------------------------------------------------------
85 -- | Top included or excluded
86 ie = opWith (+) xs ys
87 -- ie = ( xs + ys) / (2 * (x.shape[0] - 1))
88
89 -- | Top specific or generic
90 sg = opWith (-) xs ys
91 -- sg = ( xs - ys) / (2 * (x.shape[0] - 1))
92
93 nodes_kept :: [Int]
94 nodes_kept = take k' $ S.toList
95 $ foldl' (\s (n1,n2) -> insert [n1,n2] s) S.empty
96 $ map fst
97 $ nodes_included k <> nodes_specific k
98
99 nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
100 nodes_specific n = take n $ sortOn snd $ toListsWithIndex sg
101 insert as s = foldl' (\s' a -> S.insert a s') s as
102 k' = 2*k
103 k = 10
104
105 dico_nodes :: Map Int Int
106 dico_nodes = M.fromList $ zip ([1..] :: [Int]) nodes_kept
107 --dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
108
109 m' = matrix (length nodes_kept)
110 (length nodes_kept)
111 (\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')
112
113 threshold m'' = V.minimum
114 $ V.map (\cId -> V.maximum $ getCol cId m'')
115 (V.enumFromTo 1 (nOf Col m'') )
116
117 filterMat t m'' = mapAll (\x -> filter' t x) m''
118 where
119 filter' t' x = case (x >= t') of
120 True -> x
121 False -> 0
122 ------------------------------------------------------------------------