src/Gargantext/Viz/Graph/Distances/Matrice.hs

   1 {-|
   2 Module      : Gargantext.Graph.Distances.Matrix
   3 Description :
   4 Copyright   : (c) CNRS, 2017-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 Motivation and definition of the @Conditional@ distance.
  11
  12 Implementation use Accelerate library :
  13   * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
  14     [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
  15     In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
  16
  17   * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
  18     [Optimising Purely Functional GPU Programs][MCKL13].
  19     In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
  20
  21   * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
  22     [Embedding Foreign Code][CMCK14].
  23     In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
  24
  25   * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
  26     [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
  27     In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
  28
  29 -}
  30
  31 {-# LANGUAGE NoImplicitPrelude   #-}
  32 {-# LANGUAGE FlexibleContexts    #-}
  33 {-# LANGUAGE TypeFamilies        #-}
  34 {-# LANGUAGE TypeOperators       #-}
  35 {-# LANGUAGE ScopedTypeVariables #-}
  36
  37 module Gargantext.Viz.Graph.Distances.Matrice
  38   where
  39
  40 import Data.Array.Accelerate
  41 import Data.Array.Accelerate.Interpreter (run)
  42
  43 import qualified Gargantext.Prelude as P
  44
  45
  46 -----------------------------------------------------------------------
  47 -- Test perf.
  48 distriTest :: Matrix Double
  49 distriTest = distributional $ myMat 100
  50 -----------------------------------------------------------------------
  51
  52 vector :: Int -> (Array (Z :. Int) Int)
  53 vector n = fromList (Z :. n) [0..n]
  54
  55 matrix :: Elt c => Int -> [c] -> Matrix c
  56 matrix n l = fromList (Z :. n :. n) l
  57
  58 myMat :: Int -> Matrix Int
  59 myMat n = matrix n [1..]
  60
  61 -- | Two ways to get the rank (as documentation)
  62 rank :: (Matrix a) -> Int
  63 rank m = arrayRank $ arrayShape m
  64
  65 -----------------------------------------------------------------------
  66 -- | Dimension of a square Matrix
  67 -- How to force use with SquareMatrix ?
  68 type Dim = Int
  69
  70 dim :: (Matrix a) -> Dim
  71 dim m = n
  72   where
  73     Z :. _ :. n = arrayShape m
  74     -- == indexTail (arrayShape m)
  75
  76 -----------------------------------------------------------------------
  77 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  78 proba r mat = zipWith (/) mat (mkSum r mat)
  79
  80 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  81 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
  82
  83 -- divByDiag
  84 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  85 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
  86   where
  87     diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
  88     diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
  89 -----------------------------------------------------------------------
  90
  91 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
  92 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
  93   where
  94     miniMax' = (the $ minimum $ maximum m)
  95
  96 -- | Conditional distance (basic version)
  97 conditional :: Matrix Int -> Matrix Double
  98 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
  99
 100
 101 -- | Conditional distance (advanced version)
 102 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
 103 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
 104   where
 105     ie :: Acc (Matrix Double) -> Acc (Matrix Double)
 106     ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
 107     sg :: Acc (Matrix Double) -> Acc (Matrix Double)
 108     sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
 109
 110     n :: Exp Double
 111     n = P.fromIntegral r
 112
 113     r :: Dim
 114     r = dim m
 115
 116     xs :: Acc (Matrix Double) -> Acc (Matrix Double)
 117     xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
 118     ys :: Acc (Matrix Double) -> Acc (Matrix Double)
 119     ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
 120
 121 -----------------------------------------------------------------------
 122
 123 -- | Distributional Distance
 124 distributional :: Matrix Int -> Matrix Double
 125 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
 126   where
 127     n    = dim m
 128
 129     -- filter  m = zipWith (\a b -> max a b) m (transpose m)
 130
 131     ri mat = zipWith (/) mat1 mat2
 132       where
 133         mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
 134         mat2 = mkSum n mat
 135
 136     mi    m'  = zipWith (\a b -> max (log $ a/b) 0)  m'
 137               $ zipWith (/) (crossProduct m') (total m')
 138
 139     total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
 140
 141     crossProduct m''' = zipWith (*) (cross m'''  ) (cross (transpose m'''))
 142     cross mat      = zipWith (-) (mkSum n mat) (mat)
 143
 144 -----------------------------------------------------------------------
 145 -----------------------------------------------------------------------
 146
 147 {-
 148 Metric Specificity and genericity: select terms
 149
 150 let N termes
 151 Ni      : occ de i
 152 Nij     : cooc i et j
 153 Probability to get i given j : P(i|j)=Nij/Nj
 154 Gen(i)  : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity  of i
 155 Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
 156 Inclusion (i) = Gen(i)+Spec(i)
 157 Genericity score = Gen(i)- Spec(i)
 158
 159
 160 References:
 161  *  Science mapping with asymmetrical paradigmatic proximity Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276, arXiv:0803.2315 [cs.OH]
 162
 163 -}
 164
 165 type InclusionExclusion    = Double
 166 type SpecificityGenericity = Double
 167
 168 data SquareMatrix      = SymetricMatrix | NonSymetricMatrix
 169 type SymetricMatrix    = Matrix
 170 type NonSymetricMatrix = Matrix
 171
 172
 173 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
 174 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
 175   where
 176     run' fun mat = run $ fun $ map fromIntegral $ use mat
 177
 178     -- | Inclusion (i) = Gen(i)+Spec(i)
 179     inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
 180     inclusionExclusion mat = zipWith (+) (pV mat) (pH mat)
 181 --
 182     -- | Genericity score = Gen(i)- Spec(i)
 183     specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
 184     specificityGenericity mat = zipWith (-) (pV mat) (pH mat)
 185
 186     -- | Gen(i)  : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity  of i
 187     pV :: Acc (Matrix Double) -> Acc (Vector Double)
 188     pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
 189
 190     -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
 191     pH :: Acc (Matrix Double) -> Acc (Vector Double)
 192     pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
 193
 194     cardN :: Exp Double
 195     cardN = constant (P.fromIntegral (dim m) :: Double)
 196
 197
 198 -- | P(i|j) = Nij /N(jj) Probability to get i given j
 199 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
 200 p_ij m = zipWith (/) m (n_jj m)
 201   where
 202     n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 203     n_jj myMat' = backpermute (shape m)
 204                          (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
 205                                    -> (Z :. j :. j)
 206                                 )
 207                          ) myMat'
 208
 209 -- | P(j|i) = Nij /N(ii) Probability to get i given j
 210 -- to test
 211 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 212 p_ji = transpose . p_ij
 213
 214
 215 -- | Step to ckeck the result in visual/qualitative tests
 216 incExcSpeGen_proba :: Matrix Int -> Matrix Double
 217 incExcSpeGen_proba m = run' pro m
 218   where
 219     run' fun mat = run $ fun $ map fromIntegral $ use mat
 220
 221     pro mat = p_ji mat
 222
 223 {-
 224 -- | Hypothesis to test maybe later (or not)
 225 -- TODO ask accelerate for instances to ease such writtings:
 226 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 227 p_ m = zipWith (/) m (n_ m)
 228   where
 229     n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 230     n_ m = backpermute (shape m)
 231                          (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 232                                    -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
 233                                 )
 234                          ) m
 235 -}
 236
 237