]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Graph/Distances/Conditional.hs
[CLEAN] Graph: unoptmized distances using Data.Matrix (conditional and
[gargantext.git] / src / Gargantext / Graph / Distances / Conditional.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Conditional
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11 -}
12
13 {-# LANGUAGE BangPatterns #-}
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE FlexibleContexts #-}
16 {-# LANGUAGE Strict #-}
17 module Gargantext.Graph.Distances.Conditional
18 where
19
20 import Data.Matrix hiding (identity)
21 import Data.String.Conversions (ConvertibleStrings(..))
22
23 import Data.List (concat, sortOn)
24 import qualified Data.List as L
25
26 import Data.Map (Map)
27 import qualified Data.Map as M
28
29 import Data.Set (Set)
30 import qualified Data.Set as S
31
32 import Data.Vector (Vector)
33 import qualified Data.Vector as V
34
35 import Gargantext.Prelude
36 import Gargantext.Graph.Utils
37 ------------------------------------------------------------------------
38 -------------------------------------------------------
39 conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
40 conditional m = x' -- filter (threshold m') m'
41 where
42 ------------------------------------------------------------------------
43 -- | Main Operations
44 -- x' = x / (sum Col x)
45 x' = proba Col m
46
47 ------------------------------------------------------------------------
48 -- xs = (sum Col x') - x'
49 xs = distFromSum Col x'
50 -- ys = (sum Row x') - x'
51 ys = distFromSum Row x'
52
53 ------------------------------------------------------------------------
54 -- | Top included or excluded
55 ie = opWith (+) xs ys
56 -- ie = ( xs + ys) / (2 * (x.shape[0] - 1))
57
58 -- | Top specific or generic
59 sg = opWith (-) xs ys
60 -- sg = ( xs - ys) / (2 * (x.shape[0] - 1))
61
62 nodes_kept :: [Int]
63 nodes_kept = take k' $ S.toList
64 $ foldl' (\s (n1,n2) -> insert [n1,n2] s) S.empty
65 $ map fst
66 $ nodes_included k <> nodes_specific k
67
68 nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
69 nodes_specific m = take m $ sortOn snd $ toListsWithIndex sg
70 insert as s = foldl' (\s' a -> S.insert a s') s as
71 k' = 2*k
72 k = 10
73
74 dico_nodes :: Map Int Int
75 dico_nodes = M.fromList $ zip [1..] nodes_kept
76 dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
77
78 m' = matrix (length nodes_kept)
79 (length nodes_kept)
80 (\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')
81
82 threshold m = V.minimum $ V.map (\cId -> V.maximum $ getCol cId m) (V.enumFromTo 1 (nOf Col m))
83
84 filter t m = mapAll (\x -> filter' t x) m
85 where
86 filter' t x = case (x >= t) of
87 True -> x
88 False -> 0
89
90 ------------------------------------------------------------------------
91 ------------------------------------------------------------------------
92 -- | Main Functions
93 -- Compute the probability from axis
94 -- x' = x / (sum Col x)
95 proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
96 proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
97
98 ---------------------------------------------------------------
99 -- | Compute a distance from axis
100 -- xs = (sum Col x') - x'
101 distFromSum :: (Num a, Fractional a)
102 => Axis -> Matrix a -> Matrix a
103 distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
104 ---------------------------------------------------------------
105 ---------------------------------------------------------------
106 -- | To compute included/excluded or specific/generic scores
107 opWith :: (Fractional a1, Num a1)
108 => (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
109 opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
110 where
111 n = fromIntegral $ nOf Col xs
112 ---------------------------------------------------------------
113