src/Gargantext/Viz/Graph/Distances/Matrice.hs

   1 {-|
   2 Module      : Gargantext.Graph.Distances.Matrix
   3 Description :
   4 Copyright   : (c) CNRS, 2017-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 Motivation and definition of the @Conditional@ distance.
  11
  12 Implementation use Accelerate library :
  13   * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
  14     [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
  15     In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
  16
  17   * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
  18     [Optimising Purely Functional GPU Programs][MCKL13].
  19     In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
  20
  21   * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
  22     [Embedding Foreign Code][CMCK14].
  23     In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
  24
  25   * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
  26     [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
  27     In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
  28
  29 -}
  30
  31 {-# LANGUAGE NoImplicitPrelude #-}
  32 {-# LANGUAGE FlexibleContexts  #-}
  33 {-# LANGUAGE TypeFamilies      #-}
  34 {-# LANGUAGE TypeOperators     #-}
  35
  36 module Gargantext.Viz.Graph.Distances.Matrice
  37   where
  38
  39 import Data.Array.Accelerate
  40 import Data.Array.Accelerate.Interpreter (run)
  41 import Data.Array.Accelerate.Smart
  42 import Data.Array.Accelerate.Type
  43 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
  44
  45 import Data.Maybe (Maybe(Just))
  46 import qualified Gargantext.Prelude as P
  47 import qualified Data.Array.Accelerate.Array.Representation     as Repr
  48
  49 import Gargantext.Text.Metrics.Count
  50
  51
  52 -----------------------------------------------------------------------
  53 -- Test perf.
  54 distriTest = distributional $ myMat 100
  55 -----------------------------------------------------------------------
  56
  57 vector :: Int -> (Array (Z :. Int) Int)
  58 vector n = fromList (Z :. n) [0..n]
  59
  60 matrix :: Elt c => Int -> [c] -> Matrix c
  61 matrix n l = fromList (Z :. n :. n) l
  62
  63 myMat :: Int -> Matrix Int
  64 myMat n = matrix n [1..]
  65
  66 -- | Two ways to get the rank (as documentation)
  67 rank :: (Matrix a) -> Int
  68 rank m = arrayRank $ arrayShape m
  69
  70 -----------------------------------------------------------------------
  71 -- | Dimension of a square Matrix
  72 -- How to force use with SquareMatrix ?
  73 type Dim = Int
  74
  75 dim :: (Matrix a) -> Dim
  76 dim m = n
  77   where
  78     Z :. _ :. n = arrayShape m
  79     -- == indexTail (arrayShape m)
  80
  81 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  82 proba r mat = zipWith (/) mat (mkSum r mat)
  83
  84 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  85 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
  86
  87 -- divByDiag
  88 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  89 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
  90   where
  91     diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
  92     diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
  93
  94 -- | Conditional Distance
  95
  96 {-
  97 Metric Specificity and genericity: select terms
  98
  99 N termes
 100
 101 Ni : occ de i
 102
 103 Nij : cooc i et j
 104
 105 P(i|j)=Nij/Nj Probability to get i given j
 106
 107 Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
 108
 109 Spec(i) : 1/(N-1)*Sum( j!=i, P(j|i)) : Specificity of j
 110
 111 Inclusion (i) = Gen(i)+Spec(i)
 112
 113 Genericity score = Gen(i)- Spec(i)
 114
 115
 116 ----
 117
 118    Compute genericity/specificity:
 119         P(j|i) = N(ij) / N(ii)
 120         P(i|j) = N(ij) / N(jj)
 121
 122         Gen(i)  = sum P(i|j) | j /= i) / (N-1)
 123         Spec(i) = sum P(j|i) | i /= j) / (N-1)
 124
 125         Genericity(i) = (Gen(i) - Spe(i)) / 2
 126         Inclusion(i)  = (Spec(i) + Gen(i)) / 2
 127
 128 -}
 129
 130 -- M - M-1 = 0
 131 data SquareMatrix    = SymetricMatrix | NonSymetricMatrix
 132
 133 type SymetricMatrix    = Matrix
 134 type NonSymetricMatrix = Matrix
 135
 136 -- |    Compute genericity/specificity:
 137 ---- |        P(i|j) = N(ij) / N(jj)
 138 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 139 p_ij m = zipWith (/) m (n_jj m)
 140   where
 141     n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 142     n_jj m = backpermute (shape m)
 143                          (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 144                                    -> ifThenElse (i < j) (Z :. j :. j) (Z :. i :. i)
 145                                 )
 146                          ) m
 147
 148 --        P(j|i) = N(ij) / N(ii)
 149 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 150 p_ji m = zipWith (/) m (n_ii m)
 151   where
 152     n_ii :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 153     n_ii m = backpermute (shape m)
 154                          (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 155                                    -> (Z :. i :. i)
 156                                 )
 157                          ) m
 158
 159
 160 type Matrix' a = Acc (Matrix a)
 161 type InclusionExclusion    = Double
 162 type SpecificityGenericity = Double
 163
 164
 165 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
 166 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
 167   where
 168     miniMax' = (the $ minimum $ maximum m)
 169
 170 -- | Conditional distance (basic version)
 171 conditional :: Matrix Int -> Matrix Double
 172 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
 173
 174
 175 -- | Conditional distance (advanced version)
 176 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
 177 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
 178   where
 179
 180     ie :: Matrix' Double -> Matrix' Double
 181     ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
 182     sg :: Acc (Matrix Double) -> Acc (Matrix Double)
 183     sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
 184
 185     n :: Exp Double
 186     n = P.fromIntegral r
 187
 188     r :: Dim
 189     r = dim m
 190
 191     xs :: Matrix' Double -> Matrix' Double
 192     xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
 193     ys :: Acc (Matrix Double) -> Acc (Matrix Double)
 194     ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
 195
 196 -----------------------------------------------------------------------
 197
 198 -- | Distributional Distance
 199 distributional :: Matrix Int -> Matrix Double
 200 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
 201   where
 202     n    = dim m
 203
 204     filter  m = zipWith (\a b -> max a b) m (transpose m)
 205
 206     ri mat = zipWith (/) mat1 mat2
 207       where
 208         mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
 209         mat2 = mkSum n mat
 210
 211     mi    m'  = zipWith (\a b -> max (log $ a/b) 0)  m'
 212               $ zipWith (/) (crossProduct m') (total m')
 213
 214     total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
 215
 216     crossProduct m = zipWith (*) (cross m  ) (cross (transpose m))
 217     cross mat      = zipWith (-) (mkSum n mat) (mat)
 218
 219
 220 int2double :: Matrix Int -> Matrix Double
 221 int2double m = run (map fromIntegral $ use m)
 222
 223 incExcSpeGen' :: Matrix Int -> (Vector Double, Vector Double)
 224 incExcSpeGen' m = (run' ie m, run' sg m)
 225   where
 226     run' fun mat = run $ fun $ map fromIntegral $ use mat
 227
 228     ie :: Acc (Matrix Double) -> Acc (Vector Double)
 229     ie mat = zipWith (-) (pV mat) (pH mat)
 230 --
 231     sg :: Acc (Matrix Double) -> Acc (Vector Double)
 232     sg mat = zipWith (+) (pV mat) (pH mat)
 233
 234     n :: Exp Double
 235     n = constant (P.fromIntegral (dim m) :: Double)
 236
 237     pV :: Acc (Matrix Double) -> Acc (Vector Double)
 238     pV mat = map (\x -> (x-1)/(n-1)) $ sum $ p_ij mat
 239
 240     pH :: Acc (Matrix Double) -> Acc (Vector Double)
 241     pH mat = map (\x -> (x-1)/(n-1)) $ sum $ p_ji mat
 242
 243
 244 incExcSpeGen_proba :: Matrix Int -> Matrix Double
 245 incExcSpeGen_proba m = run' pro m
 246   where
 247     run' fun mat = run $ fun $ map fromIntegral $ use mat
 248
 249     pro mat = p_ij mat