src/Gargantext/Viz/Graph/Distances/Matrice.hs

   1 {-|
   2 Module      : Gargantext.Graph.Distances.Matrix
   3 Description :
   4 Copyright   : (c) CNRS, 2017-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 Motivation and definition of the @Conditional@ distance.
  11
  12 Implementation use Accelerate library :
  13   * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
  14     [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
  15     In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
  16
  17   * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
  18     [Optimising Purely Functional GPU Programs][MCKL13].
  19     In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
  20
  21   * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
  22     [Embedding Foreign Code][CMCK14].
  23     In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
  24
  25   * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
  26     [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
  27     In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
  28
  29 -}
  30
  31 {-# LANGUAGE NoImplicitPrelude   #-}
  32 {-# LANGUAGE FlexibleContexts    #-}
  33 {-# LANGUAGE TypeFamilies        #-}
  34 {-# LANGUAGE TypeOperators       #-}
  35 {-# LANGUAGE ScopedTypeVariables #-}
  36
  37
  38 module Gargantext.Viz.Graph.Distances.Matrice
  39   where
  40
  41 import Data.Array.Accelerate
  42 import Data.Array.Accelerate.Interpreter (run)
  43 import Data.Array.Accelerate.Smart
  44 import Data.Array.Accelerate.Type
  45 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
  46
  47 import Data.Maybe (Maybe(Just))
  48 import qualified Gargantext.Prelude as P
  49 import qualified Data.Array.Accelerate.Array.Representation     as Repr
  50
  51 import Gargantext.Text.Metrics.Count
  52
  53
  54 -----------------------------------------------------------------------
  55 -- Test perf.
  56 distriTest = distributional $ myMat 100
  57 -----------------------------------------------------------------------
  58
  59 vector :: Int -> (Array (Z :. Int) Int)
  60 vector n = fromList (Z :. n) [0..n]
  61
  62 matrix :: Elt c => Int -> [c] -> Matrix c
  63 matrix n l = fromList (Z :. n :. n) l
  64
  65 myMat :: Int -> Matrix Int
  66 myMat n = matrix n [1..]
  67
  68 -- | Two ways to get the rank (as documentation)
  69 rank :: (Matrix a) -> Int
  70 rank m = arrayRank $ arrayShape m
  71
  72 -----------------------------------------------------------------------
  73 -- | Dimension of a square Matrix
  74 -- How to force use with SquareMatrix ?
  75 type Dim = Int
  76
  77 dim :: (Matrix a) -> Dim
  78 dim m = n
  79   where
  80     Z :. _ :. n = arrayShape m
  81     -- == indexTail (arrayShape m)
  82
  83 -----------------------------------------------------------------------
  84 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  85 proba r mat = zipWith (/) mat (mkSum r mat)
  86
  87 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  88 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
  89
  90 -- divByDiag
  91 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  92 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
  93   where
  94     diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
  95     diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
  96 -----------------------------------------------------------------------
  97
  98 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
  99 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
 100   where
 101     miniMax' = (the $ minimum $ maximum m)
 102
 103 -- | Conditional distance (basic version)
 104 conditional :: Matrix Int -> Matrix Double
 105 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
 106
 107
 108 -- | Conditional distance (advanced version)
 109 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
 110 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
 111   where
 112     ie :: Acc (Matrix Double) -> Acc (Matrix Double)
 113     ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
 114     sg :: Acc (Matrix Double) -> Acc (Matrix Double)
 115     sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
 116
 117     n :: Exp Double
 118     n = P.fromIntegral r
 119
 120     r :: Dim
 121     r = dim m
 122
 123     xs :: Acc (Matrix Double) -> Acc (Matrix Double)
 124     xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
 125     ys :: Acc (Matrix Double) -> Acc (Matrix Double)
 126     ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
 127
 128 -----------------------------------------------------------------------
 129
 130 -- | Distributional Distance
 131 distributional :: Matrix Int -> Matrix Double
 132 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
 133   where
 134     n    = dim m
 135
 136     filter  m = zipWith (\a b -> max a b) m (transpose m)
 137
 138     ri mat = zipWith (/) mat1 mat2
 139       where
 140         mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
 141         mat2 = mkSum n mat
 142
 143     mi    m'  = zipWith (\a b -> max (log $ a/b) 0)  m'
 144               $ zipWith (/) (crossProduct m') (total m')
 145
 146     total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
 147
 148     crossProduct m = zipWith (*) (cross m  ) (cross (transpose m))
 149     cross mat      = zipWith (-) (mkSum n mat) (mat)
 150
 151
 152
 153 -----------------------------------------------------------------------
 154 -----------------------------------------------------------------------
 155
 156 {-
 157 Metric Specificity and genericity: select terms
 158
 159 let N termes
 160 Ni      : occ de i
 161 Nij     : cooc i et j
 162 Probability to get i given j : P(i|j)=Nij/Nj
 163 Gen(i)  : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity  of i
 164 Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
 165 Inclusion (i) = Gen(i)+Spec(i)
 166 Genericity score = Gen(i)- Spec(i)
 167
 168
 169 References:
 170  *  Science mapping with asymmetrical paradigmatic proximity Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276, arXiv:0803.2315 [cs.OH]
 171
 172 -}
 173
 174 type InclusionExclusion    = Double
 175 type SpecificityGenericity = Double
 176
 177 data SquareMatrix      = SymetricMatrix | NonSymetricMatrix
 178 type SymetricMatrix    = Matrix
 179 type NonSymetricMatrix = Matrix
 180
 181
 182 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
 183 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
 184   where
 185     run' fun mat = run $ fun $ map fromIntegral $ use mat
 186
 187     -- | Inclusion (i) = Gen(i)+Spec(i)
 188     inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
 189     inclusionExclusion mat = zipWith (+) (pV mat) (pH mat)
 190 --
 191     -- | Genericity score = Gen(i)- Spec(i)
 192     specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
 193     specificityGenericity mat = zipWith (-) (pV mat) (pH mat)
 194
 195     -- | Gen(i)  : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity  of i
 196     pV :: Acc (Matrix Double) -> Acc (Vector Double)
 197     pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
 198
 199     -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
 200     pH :: Acc (Matrix Double) -> Acc (Vector Double)
 201     pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
 202
 203     cardN :: Exp Double
 204     cardN = constant (P.fromIntegral (dim m) :: Double)
 205
 206
 207 -- | P(i|j) = Nij /N(jj) Probability to get i given j
 208 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
 209 p_ij m = zipWith (/) m (n_jj m)
 210   where
 211     n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 212     n_jj m = backpermute (shape m)
 213                          (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
 214                                    -> (Z :. j :. j)
 215                                 )
 216                          ) m
 217
 218 -- | P(j|i) = Nij /N(ii) Probability to get i given j
 219 -- to test
 220 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 221 p_ji = transpose . p_ij
 222
 223
 224 -- | Step to ckeck the result in visual/qualitative tests
 225 incExcSpeGen_proba :: Matrix Int -> Matrix Double
 226 incExcSpeGen_proba m = run' pro m
 227   where
 228     run' fun mat = run $ fun $ map fromIntegral $ use mat
 229
 230     pro mat = p_ji mat
 231
 232 {-
 233 -- | Hypothesis to test maybe later (or not)
 234 -- TODO ask accelerate for instances to ease such writtings:
 235 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 236 p_ m = zipWith (/) m (n_ m)
 237   where
 238     n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 239     n_ m = backpermute (shape m)
 240                          (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 241                                    -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
 242                                 )
 243                          ) m
 244 -}
 245
 246