src/Gargantext/Viz/Graph/Distances/Matrice.hs

   1 {-|
   2 Module      : Gargantext.Graph.Distances.Matrix
   3 Description :
   4 Copyright   : (c) CNRS, 2017-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 Motivation and definition of the @Conditional@ distance.
  11
  12 Implementation use Accelerate library :
  13   * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
  14     [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
  15     In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
  16
  17   * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
  18     [Optimising Purely Functional GPU Programs][MCKL13].
  19     In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
  20
  21   * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
  22     [Embedding Foreign Code][CMCK14].
  23     In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
  24
  25   * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
  26     [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
  27     In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
  28
  29 -}
  30
  31 {-# LANGUAGE NoImplicitPrelude   #-}
  32 {-# LANGUAGE FlexibleContexts    #-}
  33 {-# LANGUAGE TypeFamilies        #-}
  34 {-# LANGUAGE TypeOperators       #-}
  35 {-# LANGUAGE ScopedTypeVariables #-}
  36
  37
  38 module Gargantext.Viz.Graph.Distances.Matrice
  39   where
  40
  41 import Data.Array.Accelerate
  42 import Data.Array.Accelerate.Interpreter (run)
  43 import Data.Array.Accelerate.Smart
  44 import Data.Array.Accelerate.Type
  45 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
  46
  47 import Data.Maybe (Maybe(Just))
  48 import qualified Gargantext.Prelude as P
  49 import qualified Data.Array.Accelerate.Array.Representation     as Repr
  50
  51 import Gargantext.Text.Metrics.Count
  52
  53
  54 -----------------------------------------------------------------------
  55 -- Test perf.
  56 distriTest = distributional $ myMat 100
  57 -----------------------------------------------------------------------
  58
  59 vector :: Int -> (Array (Z :. Int) Int)
  60 vector n = fromList (Z :. n) [0..n]
  61
  62 matrix :: Elt c => Int -> [c] -> Matrix c
  63 matrix n l = fromList (Z :. n :. n) l
  64
  65 myMat :: Int -> Matrix Int
  66 myMat n = matrix n [1..]
  67
  68 -- | Two ways to get the rank (as documentation)
  69 rank :: (Matrix a) -> Int
  70 rank m = arrayRank $ arrayShape m
  71
  72 -----------------------------------------------------------------------
  73 -- | Dimension of a square Matrix
  74 -- How to force use with SquareMatrix ?
  75 type Dim = Int
  76
  77 dim :: (Matrix a) -> Dim
  78 dim m = n
  79   where
  80     Z :. _ :. n = arrayShape m
  81     -- == indexTail (arrayShape m)
  82
  83 -----------------------------------------------------------------------
  84 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  85 proba r mat = zipWith (/) mat (mkSum r mat)
  86
  87 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  88 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
  89
  90 -- divByDiag
  91 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  92 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
  93   where
  94     diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
  95     diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
  96 -----------------------------------------------------------------------
  97
  98 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
  99 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
 100   where
 101     miniMax' = (the $ minimum $ maximum m)
 102
 103 -- | Conditional distance (basic version)
 104 conditional :: Matrix Int -> Matrix Double
 105 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
 106
 107
 108 -- | Conditional distance (advanced version)
 109 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
 110 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
 111   where
 112     ie :: Acc (Matrix Double) -> Acc (Matrix Double)
 113     ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
 114     sg :: Acc (Matrix Double) -> Acc (Matrix Double)
 115     sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
 116
 117     n :: Exp Double
 118     n = P.fromIntegral r
 119
 120     r :: Dim
 121     r = dim m
 122
 123     xs :: Acc (Matrix Double) -> Acc (Matrix Double)
 124     xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
 125     ys :: Acc (Matrix Double) -> Acc (Matrix Double)
 126     ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
 127
 128 -----------------------------------------------------------------------
 129
 130 -- | Distributional Distance
 131 distributional :: Matrix Int -> Matrix Double
 132 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
 133   where
 134     n    = dim m
 135
 136     filter  m = zipWith (\a b -> max a b) m (transpose m)
 137
 138     ri mat = zipWith (/) mat1 mat2
 139       where
 140         mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
 141         mat2 = mkSum n mat
 142
 143     mi    m'  = zipWith (\a b -> max (log $ a/b) 0)  m'
 144               $ zipWith (/) (crossProduct m') (total m')
 145
 146     total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
 147
 148     crossProduct m = zipWith (*) (cross m  ) (cross (transpose m))
 149     cross mat      = zipWith (-) (mkSum n mat) (mat)
 150
 151 -----------------------------------------------------------------------
 152 -----------------------------------------------------------------------
 153
 154 {-
 155 Metric Specificity and genericity: select terms
 156
 157 let N termes
 158 Ni      : occ de i
 159 Nij     : cooc i et j
 160 Probability to get i given j : P(i|j)=Nij/Nj
 161 Gen(i)  : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity  of i
 162 Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
 163 Inclusion (i) = Gen(i)+Spec(i)
 164 Genericity score = Gen(i)- Spec(i)
 165
 166
 167 References:
 168  *  Science mapping with asymmetrical paradigmatic proximity Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276, arXiv:0803.2315 [cs.OH]
 169
 170 -}
 171
 172 type InclusionExclusion    = Double
 173 type SpecificityGenericity = Double
 174
 175 data SquareMatrix      = SymetricMatrix | NonSymetricMatrix
 176 type SymetricMatrix    = Matrix
 177 type NonSymetricMatrix = Matrix
 178
 179
 180 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
 181 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
 182   where
 183     run' fun mat = run $ fun $ map fromIntegral $ use mat
 184
 185     -- | Inclusion (i) = Gen(i)+Spec(i)
 186     inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
 187     inclusionExclusion mat = zipWith (+) (pV mat) (pH mat)
 188 --
 189     -- | Genericity score = Gen(i)- Spec(i)
 190     specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
 191     specificityGenericity mat = zipWith (-) (pV mat) (pH mat)
 192
 193     -- | Gen(i)  : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity  of i
 194     pV :: Acc (Matrix Double) -> Acc (Vector Double)
 195     pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
 196
 197     -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
 198     pH :: Acc (Matrix Double) -> Acc (Vector Double)
 199     pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
 200
 201     cardN :: Exp Double
 202     cardN = constant (P.fromIntegral (dim m) :: Double)
 203
 204
 205 -- | P(i|j) = Nij /N(jj) Probability to get i given j
 206 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
 207 p_ij m = zipWith (/) m (n_jj m)
 208   where
 209     n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 210     n_jj m = backpermute (shape m)
 211                          (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
 212                                    -> (Z :. j :. j)
 213                                 )
 214                          ) m
 215
 216 -- | P(j|i) = Nij /N(ii) Probability to get i given j
 217 -- to test
 218 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 219 p_ji = transpose . p_ij
 220
 221
 222 -- | Step to ckeck the result in visual/qualitative tests
 223 incExcSpeGen_proba :: Matrix Int -> Matrix Double
 224 incExcSpeGen_proba m = run' pro m
 225   where
 226     run' fun mat = run $ fun $ map fromIntegral $ use mat
 227
 228     pro mat = p_ji mat
 229
 230 {-
 231 -- | Hypothesis to test maybe later (or not)
 232 -- TODO ask accelerate for instances to ease such writtings:
 233 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 234 p_ m = zipWith (/) m (n_ m)
 235   where
 236     n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 237     n_ m = backpermute (shape m)
 238                          (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 239                                    -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
 240                                 )
 241                          ) m
 242 -}
 243
 244