src/Gargantext/Viz/Graph/Distances/Matrice.hs

   1 {-|
   2 Module      : Gargantext.Graph.Distances.Matrix
   3 Description :
   4 Copyright   : (c) CNRS, 2017-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 Motivation and definition of the @Conditional@ distance.
  11
  12 Implementation use Accelerate library :
  13   * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
  14     [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
  15     In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
  16
  17   * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
  18     [Optimising Purely Functional GPU Programs][MCKL13].
  19     In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
  20
  21   * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
  22     [Embedding Foreign Code][CMCK14].
  23     In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
  24
  25   * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
  26     [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
  27     In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
  28
  29 -}
  30
  31 {-# LANGUAGE NoImplicitPrelude #-}
  32 {-# LANGUAGE FlexibleContexts  #-}
  33 {-# LANGUAGE TypeFamilies      #-}
  34 {-# LANGUAGE TypeOperators     #-}
  35
  36 module Gargantext.Viz.Graph.Distances.Matrice
  37   where
  38
  39 import Data.Array.Accelerate
  40 import Data.Array.Accelerate.Interpreter (run)
  41 import Data.Array.Accelerate.Smart
  42 import Data.Array.Accelerate.Type
  43 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
  44
  45 import Data.Maybe (Maybe(Just))
  46 import qualified Gargantext.Prelude as P
  47 import qualified Data.Array.Accelerate.Array.Representation     as Repr
  48
  49 import Gargantext.Text.Metrics.Count
  50
  51
  52 -----------------------------------------------------------------------
  53 -- Test perf.
  54 distriTest = distributional $ myMat 100
  55 -----------------------------------------------------------------------
  56
  57 vector :: Int -> (Array (Z :. Int) Int)
  58 vector n = fromList (Z :. n) [0..n]
  59
  60 matrix :: Elt c => Int -> [c] -> Matrix c
  61 matrix n l = fromList (Z :. n :. n) l
  62
  63 myMat :: Int -> Matrix Int
  64 myMat n = matrix n [1..]
  65
  66 -- | Two ways to get the rank (as documentation)
  67 rank :: (Matrix a) -> Int
  68 rank m = arrayRank $ arrayShape m
  69
  70 -----------------------------------------------------------------------
  71 -- | Dimension of a square Matrix
  72 -- How to force use with SquareMatrix ?
  73 type Dim = Int
  74
  75 dim :: (Matrix a) -> Dim
  76 dim m = n
  77   where
  78     Z :. _ :. n = arrayShape m
  79     -- == indexTail (arrayShape m)
  80
  81 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  82 proba r mat = zipWith (/) mat (mkSum r mat)
  83
  84 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  85 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
  86
  87 -- divByDiag
  88 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  89 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
  90   where
  91     diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
  92     diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
  93
  94 -- | Conditional Distance
  95
  96 {-
  97 Metric Specificity and genericity: select terms
  98
  99 N termes
 100
 101 Ni : occ de i
 102
 103 Nij : cooc i et j
 104
 105 P(i|j)=Nij/Nj Probability to get i given j
 106
 107 Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
 108
 109 Spec(i) : 1/(N-1)*Sum( j!=i, P(j|i)) : Specificity of j
 110
 111 Inclusion (i) = Gen(i)+Spec(i)
 112
 113 Genericity score = Gen(i)- Spec(i)
 114
 115
 116 ----
 117
 118    Compute genericity/specificity:
 119         P(j|i) = N(ij) / N(ii)
 120         P(i|j) = N(ij) / N(jj)
 121
 122         Gen(i)  = sum P(i|j) | j /= i) / (N-1)
 123         Spec(i) = sum P(j|i) | i /= j) / (N-1)
 124
 125         Genericity(i) = (Gen(i) - Spe(i)) / 2
 126         Inclusion(i)  = (Spec(i) + Gen(i)) / 2
 127
 128 -}
 129
 130 -- M - M-1 = 0
 131 data SquareMatrix    = SymetricMatrix | NonSymetricMatrix
 132
 133 type SymetricMatrix    = Matrix
 134 type NonSymetricMatrix = Matrix
 135
 136 -- |    Compute genericity/specificity:
 137 --p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 138 --p_ m = zipWith (/) m (n_jj m)
 139 --  where
 140 --    n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 141 --    n_jj m = backpermute (shape m)
 142 --                         (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 143 --                                   -> ifThenElse (i < j) (Z :. j :. j) (Z :. i :. i)
 144 --                                )
 145 --                         ) m
 146
 147
 148
 149
 150 ---- |        P(i|j) = N(ij) / N(jj)
 151 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 152 p_ij m = zipWith (/) m (n_jj m)
 153   where
 154     n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 155     n_jj m = backpermute (shape m)
 156                          (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 157                                    -> (Z :. j :. j)
 158                                 )
 159                          ) m
 160
 161 --        P(j|i) = N(ij) / N(ii)
 162 -- to test
 163 p_ji' :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 164 p_ji' = transpose . p_ij
 165
 166 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 167 p_ji m = zipWith (/) m (n_jj m)
 168   where
 169     n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 170     n_jj m = backpermute (shape m)
 171                          (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 172                                    -> (Z :. i :. i)
 173                                 )
 174                          ) m
 175
 176
 177
 178 type Matrix' a = Acc (Matrix a)
 179 type InclusionExclusion    = Double
 180 type SpecificityGenericity = Double
 181
 182
 183 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
 184 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
 185   where
 186     miniMax' = (the $ minimum $ maximum m)
 187
 188 -- | Conditional distance (basic version)
 189 conditional :: Matrix Int -> Matrix Double
 190 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
 191
 192
 193 -- | Conditional distance (advanced version)
 194 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
 195 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
 196   where
 197
 198     ie :: Matrix' Double -> Matrix' Double
 199     ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
 200     sg :: Acc (Matrix Double) -> Acc (Matrix Double)
 201     sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
 202
 203     n :: Exp Double
 204     n = P.fromIntegral r
 205
 206     r :: Dim
 207     r = dim m
 208
 209     xs :: Matrix' Double -> Matrix' Double
 210     xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
 211     ys :: Acc (Matrix Double) -> Acc (Matrix Double)
 212     ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
 213
 214 -----------------------------------------------------------------------
 215
 216 -- | Distributional Distance
 217 distributional :: Matrix Int -> Matrix Double
 218 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
 219   where
 220     n    = dim m
 221
 222     filter  m = zipWith (\a b -> max a b) m (transpose m)
 223
 224     ri mat = zipWith (/) mat1 mat2
 225       where
 226         mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
 227         mat2 = mkSum n mat
 228
 229     mi    m'  = zipWith (\a b -> max (log $ a/b) 0)  m'
 230               $ zipWith (/) (crossProduct m') (total m')
 231
 232     total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
 233
 234     crossProduct m = zipWith (*) (cross m  ) (cross (transpose m))
 235     cross mat      = zipWith (-) (mkSum n mat) (mat)
 236
 237
 238 int2double :: Matrix Int -> Matrix Double
 239 int2double m = run (map fromIntegral $ use m)
 240
 241 incExcSpeGen' :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
 242 incExcSpeGen' m = (run' ie m, run' sg m)
 243   where
 244     run' fun mat = run $ fun $ map fromIntegral $ use mat
 245
 246     ie :: Acc (Matrix Double) -> Acc (Vector Double)
 247     ie mat = zipWith (+) (pV mat) (pH mat)
 248 --
 249     sg :: Acc (Matrix Double) -> Acc (Vector Double)
 250     sg mat = zipWith (-) (pV mat) (pH mat)
 251
 252     n :: Exp Double
 253     n = constant (P.fromIntegral (dim m) :: Double)
 254
 255     pV :: Acc (Matrix Double) -> Acc (Vector Double)
 256     pV mat = map (\x -> (x-1)/(n-1)) $ sum $ p_ij mat
 257
 258     pH :: Acc (Matrix Double) -> Acc (Vector Double)
 259     pH mat = map (\x -> (x-1)/(n-1)) $ sum $ transpose $ p_ij mat
 260
 261
 262 incExcSpeGen_proba :: Matrix Int -> Matrix Double
 263 incExcSpeGen_proba m = run' pro m
 264   where
 265     run' fun mat = run $ fun $ map fromIntegral $ use mat
 266
 267     pro mat = p_ji mat