]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Graph/Distances/Conditional.hs
[FEAT/Distances] Accelerate lib for GPU: conditional and distributional. Needs behavi...
[gargantext.git] / src / Gargantext / Graph / Distances / Conditional.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Conditional
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11 -}
12
13 {-# LANGUAGE BangPatterns #-}
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE FlexibleContexts #-}
16 {-# LANGUAGE Strict #-}
17 module Gargantext.Graph.Distances.Conditional
18 where
19
20 import Data.Matrix hiding (identity)
21 import Data.String.Conversions (ConvertibleStrings(..))
22
23 import Data.List (concat, sortOn)
24 import qualified Data.List as L
25
26 import Data.Map (Map)
27 import qualified Data.Map as M
28
29 import Data.Set (Set)
30 import qualified Data.Set as S
31
32 import Data.Vector (Vector)
33 import qualified Data.Vector as V
34
35 import Gargantext.Prelude
36 import Gargantext.Graph.Utils
37
38 ------------------------------------------------------------------------
39 ------------------------------------------------------------------------
40 -- | Optimisation issue
41
42 toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
43 toBeOptimized m = proba Col m
44
45 ------------------------------------------------------------------------
46 -- | Main Functions
47 -- Compute the probability from axis
48 -- x' = x / (sum Col x)
49 proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
50 proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
51
52
53 mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
54 mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
55 where
56 f' m' c = mapOnly a f c m'
57
58 mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
59 mapOnly Col = mapCol
60 mapOnly Row = mapRow
61
62 mapAll :: (a -> a) -> Matrix a -> Matrix a
63 mapAll f m = mapOn Col (\_ -> f) m
64
65
66 ---------------------------------------------------------------
67 -- | Compute a distance from axis
68 -- xs = (sum Col x') - x'
69 distFromSum :: (Num a, Fractional a)
70 => Axis -> Matrix a -> Matrix a
71 distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
72 ---------------------------------------------------------------
73 ---------------------------------------------------------------
74 -- | To compute included/excluded or specific/generic scores
75 opWith :: (Fractional a1, Num a1)
76 => (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
77 opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
78 where
79 n = fromIntegral $ nOf Col xs
80 ---------------------------------------------------------------
81
82
83 -------------------------------------------------------
84 conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
85 conditional m = filter (threshold m') m'
86 where
87 ------------------------------------------------------------------------
88 -- | Main Operations
89 -- x' = x / (sum Col x)
90 x' = proba Col m
91
92 ------------------------------------------------------------------------
93 -- xs = (sum Col x') - x'
94 xs = distFromSum Col x'
95 -- ys = (sum Row x') - x'
96 ys = distFromSum Row x'
97
98 ------------------------------------------------------------------------
99 -- | Top included or excluded
100 ie = opWith (+) xs ys
101 -- ie = ( xs + ys) / (2 * (x.shape[0] - 1))
102
103 -- | Top specific or generic
104 sg = opWith (-) xs ys
105 -- sg = ( xs - ys) / (2 * (x.shape[0] - 1))
106
107 nodes_kept :: [Int]
108 nodes_kept = take k' $ S.toList
109 $ foldl' (\s (n1,n2) -> insert [n1,n2] s) S.empty
110 $ map fst
111 $ nodes_included k <> nodes_specific k
112
113 nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
114 nodes_specific m = take m $ sortOn snd $ toListsWithIndex sg
115 insert as s = foldl' (\s' a -> S.insert a s') s as
116 k' = 2*k
117 k = 10
118
119 dico_nodes :: Map Int Int
120 dico_nodes = M.fromList $ zip [1..] nodes_kept
121 dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
122
123 m' = matrix (length nodes_kept)
124 (length nodes_kept)
125 (\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')
126
127 threshold m = V.minimum $ V.map (\cId -> V.maximum $ getCol cId m) (V.enumFromTo 1 (nOf Col m))
128
129 filter t m = mapAll (\x -> filter' t x) m
130 where
131 filter' t x = case (x >= t) of
132 True -> x
133 False -> 0
134
135 ------------------------------------------------------------------------