src/Gargantext/Viz/Graph/Distances/Matrice.hs

   1 {-|
   2 Module      : Gargantext.Graph.Distances.Matrix
   3 Description :
   4 Copyright   : (c) CNRS, 2017-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 Motivation and definition of the @Conditional@ distance.
  11
  12 Implementation use Accelerate library :
  13   * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
  14     [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
  15     In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
  16
  17   * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
  18     [Optimising Purely Functional GPU Programs][MCKL13].
  19     In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
  20
  21   * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
  22     [Embedding Foreign Code][CMCK14].
  23     In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
  24
  25   * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
  26     [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
  27     In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
  28
  29 -}
  30
  31 {-# LANGUAGE NoImplicitPrelude #-}
  32 {-# LANGUAGE FlexibleContexts  #-}
  33 {-# LANGUAGE TypeFamilies      #-}
  34 {-# LANGUAGE TypeOperators     #-}
  35
  36 module Gargantext.Viz.Graph.Distances.Matrice
  37   where
  38
  39 import Data.Array.Accelerate
  40 import Data.Array.Accelerate.Interpreter (run)
  41 import Data.Array.Accelerate.Smart
  42 import Data.Array.Accelerate.Type
  43 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
  44
  45 import Data.Maybe (Maybe(Just))
  46 import qualified Gargantext.Prelude as P
  47 import qualified Data.Array.Accelerate.Array.Representation     as Repr
  48
  49 import Gargantext.Text.Metrics.Count
  50
  51
  52 -----------------------------------------------------------------------
  53 -- Test perf.
  54 distriTest = distributional $ myMat 100
  55 -----------------------------------------------------------------------
  56
  57 vector :: Int -> (Array (Z :. Int) Int)
  58 vector n = fromList (Z :. n) [0..n]
  59
  60 matrix :: Elt c => Int -> [c] -> Matrix c
  61 matrix n l = fromList (Z :. n :. n) l
  62
  63 myMat :: Int -> Matrix Int
  64 myMat n = matrix n [1..]
  65
  66 -- | Two ways to get the rank (as documentation)
  67 rank :: (Matrix a) -> Int
  68 rank m = arrayRank $ arrayShape m
  69
  70 -----------------------------------------------------------------------
  71 -- | Conditional Distance
  72
  73 -- | Dimension of a square Matrix
  74 -- How to force use with SquareMatrix ?
  75 type Dim = Int
  76
  77 dim :: (Matrix a) -> Dim
  78 dim m = n
  79   where
  80     Z :. _ :. n = arrayShape m
  81     -- == indexTail (arrayShape m)
  82
  83 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  84 proba r mat = zipWith (/) mat (mkSum r mat)
  85
  86 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  87 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
  88
  89 -- divByDiag
  90 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  91 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
  92
  93 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
  94 diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
  95
  96 {-
  97 Metric Specificity and genericity: select terms
  98    Compute genericity/specificity:
  99         P(j|i) = N(ij) / N(ii)
 100         P(i|j) = N(ij) / N(jj)
 101
 102         Gen(i)  = sum P(i|j) | j /= i) / (N-1)
 103         Spec(i) = sum P(j|i) | i /= j) / (N-1)
 104
 105         Genericity(i) = (Gen(i) - Spe(i)) / 2
 106         Inclusion(i)  = (Spec(i) + Gen(i)) / 2
 107
 108 -}
 109
 110 -- M - M-1 = 0
 111 data SquareMatrix    = SymetricMatrix | NonSymetricMatrix
 112
 113 type SymetricMatrix    = Matrix
 114 type NonSymetricMatrix = Matrix
 115
 116 -- |    Compute genericity/specificity:
 117 ---- |        P(i|j) = N(ij) / N(jj)
 118 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 119 p_ij m = zipWith (/) m (n_jj m)
 120   where
 121     n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 122     n_jj m = backpermute (shape m)
 123                          (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 124                                    -> (Z :. j :. j)
 125                                 )
 126                          ) m
 127
 128 --        P(j|i) = N(ij) / N(ii)
 129 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 130 p_ji m = zipWith (/) m (n_ii m)
 131   where
 132     n_ii :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 133     n_ii m = backpermute (shape m)
 134                          (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 135                                    -> (Z :. i :. i)
 136                                 )
 137                          ) m
 138
 139
 140 type Matrix' a = Acc (Matrix a)
 141 type InclusionExclusion    = Double
 142 type SpecificityGenericity = Double
 143
 144
 145 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
 146 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
 147   where
 148     miniMax' = (the $ minimum $ maximum m)
 149
 150 -- | Conditional distance (basic version)
 151 conditional :: Matrix Int -> Matrix Double
 152 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
 153
 154
 155 -- | Conditional distance (advanced version)
 156 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
 157 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
 158   where
 159
 160     ie :: Matrix' Double -> Matrix' Double
 161     ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
 162     sg :: Acc (Matrix Double) -> Acc (Matrix Double)
 163     sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
 164
 165     n :: Exp Double
 166     n = P.fromIntegral r
 167
 168     r :: Dim
 169     r = dim m
 170
 171     xs :: Matrix' Double -> Matrix' Double
 172     xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
 173     ys :: Acc (Matrix Double) -> Acc (Matrix Double)
 174     ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
 175
 176 -----------------------------------------------------------------------
 177
 178 -- | Distributional Distance
 179 distributional :: Matrix Int -> Matrix Double
 180 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
 181   where
 182     n    = dim m
 183
 184     filter  m = zipWith (\a b -> max a b) m (transpose m)
 185
 186     ri mat = zipWith (/) mat1 mat2
 187       where
 188         mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
 189         mat2 = mkSum n mat
 190
 191     mi    m'  = zipWith (\a b -> max (log $ a/b) 0)  m'
 192               $ zipWith (/) (crossProduct m') (total m')
 193
 194     total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
 195
 196     crossProduct m = zipWith (*) (cross m  ) (cross (transpose m))
 197     cross mat      = zipWith (-) (mkSum n mat) (mat)
 198
 199
 200 int2double :: Matrix Int -> Matrix Double
 201 int2double m = run (map fromIntegral $ use m)
 202
 203 incExcSpeGen' :: Matrix Int -> (Vector Double, Vector Double)
 204 incExcSpeGen' m = (run' ie m, run' sg m)
 205   where
 206     run' fun mat = run $ fun $ map fromIntegral $ use mat
 207
 208     ie :: Acc (Matrix Double) -> Acc (Vector Double)
 209     ie mat = zipWith (-) (pV mat) (pH mat)
 210 --
 211     sg :: Acc (Matrix Double) -> Acc (Vector Double)
 212     sg mat = zipWith (+) (pV mat) (pH mat)
 213
 214     n :: Exp Double
 215     n = constant (P.fromIntegral (dim m) :: Double)
 216
 217     pV :: Acc (Matrix Double) -> Acc (Vector Double)
 218     pV mat = map (\x -> (x-1)/(n-1)) $ sum $ p_ij mat
 219
 220     pH :: Acc (Matrix Double) -> Acc (Vector Double)
 221     pH mat = map (\x -> (x-1)/(n-1)) $ sum $ p_ji mat
 222
 223
 224 incExcSpeGen_proba :: Matrix Int -> Matrix Double
 225 incExcSpeGen_proba m = run' pro m
 226   where
 227     run' fun mat = run $ fun $ map fromIntegral $ use mat
 228
 229     pro mat = p_ij mat