2 Module : Gargantext.Graph.Distances.Matrix
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Motivation and definition of the @Conditional@ distance.
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
36 module Gargantext.Viz.Graph.Distances.Matrice
39 import Data.Array.Accelerate
40 import Data.Array.Accelerate.Interpreter (run)
41 import Data.Array.Accelerate.Smart
42 import Data.Array.Accelerate.Type
43 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
45 import Data.Maybe (Maybe(Just))
46 import qualified Gargantext.Prelude as P
47 import qualified Data.Array.Accelerate.Array.Representation as Repr
49 import Gargantext.Text.Metrics.Count
52 -----------------------------------------------------------------------
54 distriTest = distributional $ myMat 100
55 -----------------------------------------------------------------------
57 vector :: Int -> (Array (Z :. Int) Int)
58 vector n = fromList (Z :. n) [0..n]
60 matrix :: Elt c => Int -> [c] -> Matrix c
61 matrix n l = fromList (Z :. n :. n) l
63 myMat :: Int -> Matrix Int
64 myMat n = matrix n [1..]
66 -- | Two ways to get the rank (as documentation)
67 rank :: (Matrix a) -> Int
68 rank m = arrayRank $ arrayShape m
70 rank' :: (Matrix a) -> Int
73 Z :. _ :. n = arrayShape m
75 -----------------------------------------------------------------------
76 -- | Conditional Distance
80 proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
81 proba r mat = zipWith (/) mat (mkSum r mat)
83 mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
84 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
86 divByDiag :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
87 divByDiag r mat = zipWith (/) mat (replicate (constant (Z :. (r :: Int) :. All)) $ diag mat)
89 diag :: forall e. Elt e => Acc (Matrix e) -> Acc (Vector e)
90 diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) (m :: Acc (Array DIM2 e))
93 type Matrix' a = Acc (Matrix a)
94 type InclusionExclusion = Double
95 type SpecificityGenericity = Double
98 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
99 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
101 miniMax' = (the $ minimum $ maximum m)
103 -- | Conditional distance (basic version)
104 conditional :: Matrix Int -> Matrix Double
105 conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
111 -- | Conditional distance (advanced version)
112 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
113 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
116 ie :: Matrix' Double -> Matrix' Double
117 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
118 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
119 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
127 xs :: Matrix' Double -> Matrix' Double
128 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
129 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
130 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
132 -----------------------------------------------------------------------
134 -- | Distributional Distance
135 distributional :: Matrix Int -> Matrix Double
136 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
140 filter m = zipWith (\a b -> max a b) m (transpose m)
142 ri mat = zipWith (/) mat1 mat2
144 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
147 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
148 $ zipWith (/) (crossProduct m') (total m')
150 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
152 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
153 cross mat = zipWith (-) (mkSum n mat) (mat)
156 int2double :: Matrix Int -> Matrix Double
157 int2double m = run (map fromIntegral $ use m)
160 Metric Specificity and genericity: select terms
161 Compute genericity/specificity:
162 P(j|i) = N(ij) / N(ii)
163 P(i|j) = N(ij) / N(jj)
165 Gen(i) = Mean{j} P(j_k|i)
166 Spec(i) = Mean{j} P(i|j_k)
168 Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
169 Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
174 incExcSpeGen' :: Matrix Int -> (Vector Double, Vector Double)
175 incExcSpeGen' m = (run' ie m, run' sg m)
177 run' fun mat = run $ fun $ map fromIntegral $ use mat
179 ie :: Acc (Matrix Double) -> Acc (Vector Double)
180 ie mat = zipWith (-) (pV mat) (pH mat)
182 sg :: Acc (Matrix Double) -> Acc (Vector Double)
183 sg mat = zipWith (+) (pV mat) (pH mat)
186 n = constant (P.fromIntegral (rank' m - 1) :: Double)
188 pV :: Acc (Matrix Double) -> Acc (Vector Double)
189 pV mat = map (\x -> (x-1)/n) $ sum $ divByDiag (rank' m) mat
191 pH :: Acc (Matrix Double) -> Acc (Vector Double)
192 pH mat = map (\x -> (x-1)/n) $ sum $ transpose $ divByDiag (rank' m) mat
196 incExcSpeGen_proba :: Matrix Int -> Matrix Double
197 incExcSpeGen_proba m = run' pro m
199 run' fun mat = run $ fun $ map fromIntegral $ use mat
201 pro mat = divByDiag (rank' m) mat