src/Gargantext/Viz/Graph/Distances/Matrice.hs

   1 {-|
   2 Module      : Gargantext.Graph.Distances.Matrix
   3 Description :
   4 Copyright   : (c) CNRS, 2017-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 This module aims at implementig distances of terms context by context is
  11 the same referential of corpus.
  12
  13
  14 Implementation use Accelerate library which enables GPU and CPU computation:
  15
  16   * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
  17     [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
  18     In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
  19
  20   * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
  21     [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
  22     In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
  23
  24 -}
  25
  26 {-# LANGUAGE TypeFamilies        #-}
  27 {-# LANGUAGE TypeOperators       #-}
  28 {-# LANGUAGE ScopedTypeVariables #-}
  29 {-# LANGUAGE ViewPatterns        #-}
  30
  31 module Gargantext.Viz.Graph.Distances.Matrice
  32   where
  33
  34 import Debug.Trace (trace)
  35 import Data.Array.Accelerate
  36 import Data.Array.Accelerate.Interpreter (run)
  37 import qualified Gargantext.Prelude as P
  38
  39
  40 -----------------------------------------------------------------------
  41 -- | Define a vector
  42 --
  43 -- >>> vector 3
  44 -- Vector (Z :. 3) [0,1,2]
  45 vector :: Elt c => Int -> [c] -> (Array (Z :. Int) c)
  46 vector n l = fromList (Z :. n) l
  47
  48 -- | Define a matrix
  49 --
  50 -- >>> matrix 3 ([1..] :: [Double])
  51 -- Matrix (Z :. 3 :. 3)
  52 --   [ 1.0, 2.0, 3.0,
  53 --     4.0, 5.0, 6.0,
  54 --     7.0, 8.0, 9.0]
  55 matrix :: Elt c => Int -> [c] -> Matrix c
  56 matrix n l = fromList (Z :. n :. n) l
  57
  58 -- | Two ways to get the rank (as documentation)
  59 --
  60 -- >>> rank (matrix 3 ([1..] :: [Int]))
  61 -- 2
  62 rank :: (Matrix a) -> Int
  63 rank m = arrayRank $ arrayShape m
  64
  65 -----------------------------------------------------------------------
  66 -- | Dimension of a square Matrix
  67 -- How to force use with SquareMatrix ?
  68 type Dim = Int
  69
  70 -- | Get Dimension of a square Matrix
  71 --
  72 -- >>> dim (matrix 3 ([1..] :: [Int]))
  73 -- 3
  74 dim :: Matrix a -> Dim
  75 dim m = n
  76   where
  77     Z :. _ :. n = arrayShape m
  78     -- indexTail (arrayShape m)
  79
  80 -----------------------------------------------------------------------
  81 -- TODO move to Utils
  82 runExp :: Elt e => Exp e -> e
  83 runExp e = indexArray (run (unit e)) Z
  84 -----------------------------------------------------------------------
  85
  86 -- | Sum of a Matrix by Column
  87 --
  88 -- >>> run $ matSumCol 3 (use $ matrix 3 [1..])
  89 -- Matrix (Z :. 3 :. 3)
  90 --   [ 12.0, 15.0, 18.0,
  91 --     12.0, 15.0, 18.0,
  92 --     12.0, 15.0, 18.0]
  93 matSumCol :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  94 matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
  95
  96 matSumCol' :: Matrix Double -> Matrix Double
  97 matSumCol' m = run $ matSumCol n m'
  98   where
  99     n  = dim m
 100     m' = use m
 101
 102
 103 -- | Proba computes de probability matrix: all cells divided by thee sum of its column
 104 -- if you need get the probability on the lines, just transpose it
 105 --
 106 -- >>> run $ matProba 3 (use $ matrix 3 [1..])
 107 -- Matrix (Z :. 3 :. 3)
 108 --   [ 8.333333333333333e-2, 0.13333333333333333, 0.16666666666666666,
 109 --       0.3333333333333333,  0.3333333333333333,  0.3333333333333333,
 110 --       0.5833333333333334,  0.5333333333333333,                 0.5]
 111 matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 112 matProba r mat = zipWith (/) mat (matSumCol r mat)
 113
 114 -- | Diagonal of the matrix
 115 --
 116 -- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
 117 -- Vector (Z :. 3) [1,5,9]
 118 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
 119 diag m = backpermute (indexTail (shape m))
 120                      (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int))))
 121                      m
 122
 123 -- | Divide by the Diagonal of the matrix
 124 --
 125 -- >>> run $ divByDiag 3 (use $ matrix 3 ([1..] :: [Double]))
 126 -- Matrix (Z :. 3 :. 3)
 127 --   [ 1.0, 0.4, 0.3333333333333333,
 128 --     4.0, 1.0, 0.6666666666666666,
 129 --     7.0, 1.6,                1.0]
 130 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 131 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
 132
 133 -----------------------------------------------------------------------
 134 -- | Filters the matrix with the minimum of maximums
 135 --
 136 -- >>> run $ matMiniMax $ use $ matrix 3 [1..]
 137 -- Matrix (Z :. 3 :. 3)
 138 --   [ 0.0, 4.0, 7.0,
 139 --     0.0, 5.0, 8.0,
 140 --     0.0, 6.0, 9.0]
 141 matMiniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
 142 matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
 143   where
 144     miniMax' = (the $ minimum $ maximum m)
 145
 146 -- | Filters the matrix with a constant
 147 --
 148 -- >>> run $ matFilter 5 $ use $ matrix 3 [1..]
 149 -- Matrix (Z :. 3 :. 3)
 150 --   [ 0.0, 0.0, 7.0,
 151 --     0.0, 0.0, 8.0,
 152 --     0.0, 6.0, 9.0]
 153 filter' :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
 154 filter' t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
 155
 156 -----------------------------------------------------------------------
 157 -- * Measures of proximity
 158 -----------------------------------------------------------------------
 159 -- ** Conditional distance
 160
 161 -- *** Conditional distance (basic)
 162
 163 -- | Conditional distance (basic version)
 164 --
 165 -- 2 main measures are actually implemented in order to compute the
 166 -- proximity of two terms: conditional and distributional
 167 --
 168 -- Conditional measure is an absolute measure which reflects
 169 -- interactions of 2 terms in the corpus.
 170 measureConditional :: Matrix Int -> Matrix Double
 171 --measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
 172 measureConditional m = run $ matProba (dim m)
 173                            $ map fromIntegral
 174                            $ use m
 175
 176
 177 -- *** Conditional distance (advanced)
 178
 179 -- | Conditional distance (advanced version)
 180 --
 181 -- The conditional measure P(i|j) of 2 terms @i@ and @j@, also called
 182 -- "confidence" , is the maximum probability between @i@ and @j@ to see
 183 -- @i@ in the same context of @j@ knowing @j@.
 184 --
 185 -- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
 186 -- in the corpus and _[n_{ij}\] the number of its occurrences we get:
 187 --
 188 -- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
 189 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
 190 conditional' m = ( run $ ie $ map fromIntegral $ use m
 191                  , run $ sg $ map fromIntegral $ use m
 192                  )
 193   where
 194     ie :: Acc (Matrix Double) -> Acc (Matrix Double)
 195     ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
 196     sg :: Acc (Matrix Double) -> Acc (Matrix Double)
 197     sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
 198
 199     n :: Exp Double
 200     n = P.fromIntegral r
 201
 202     r :: Dim
 203     r = dim m
 204
 205     xs :: Acc (Matrix Double) -> Acc (Matrix Double)
 206     xs mat = zipWith (-) (matSumCol r $ matProba r mat) (matProba r mat)
 207     ys :: Acc (Matrix Double) -> Acc (Matrix Double)
 208     ys mat = zipWith (-) (matSumCol r $ transpose $ matProba r mat) (matProba r mat)
 209
 210 -----------------------------------------------------------------------
 211 -- ** Distributional Distance
 212
 213 -- | Distributional Distance Measure
 214 --
 215 -- Distributional measure is a relative measure which depends on the
 216 -- selected list, it represents structural equivalence of mutual information.
 217 --
 218 -- The distributional measure P(c) of @i@ and @j@ terms is: \[
 219 -- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
 220 -- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
 221 --
 222 -- Mutual information
 223 -- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
 224 --
 225 -- Number of cooccurrences of @i@ and @j@ in the same context of text
 226 --                        \[C{ij}\]
 227 --
 228 -- The expected value of the cooccurrences @i@ and @j@ (given a map list of size @n@)
 229 --            \[E_{ij}^{m} = \frac {S_{i} S_{j}} {N_{m}}\]
 230 --
 231 -- Total cooccurrences of term @i@ given a map list of size @m@
 232 --            \[S_{i} = \sum_{j, j \neq i}^{m} S_{ij}\]
 233 --
 234 -- Total cooccurrences of terms given a map list of size @m@
 235 --            \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
 236 --
 237 distributional :: Matrix Int -> Matrix Double
 238 distributional m = run -- $ matMiniMax
 239                        -- $ ri
 240                        -- $ myMin
 241                        $ filter' 0
 242                        $ s_mi
 243                        $ map fromIntegral  -- ^ from Int to Double
 244                        $ use m             -- ^ push matrix in Accelerate type
 245   where
 246     -- filter  m = zipWith (\a b -> max a b) m (transpose m)
 247
 248 {-
 249     ri :: Acc (Matrix Double) -> Acc (Matrix Double)
 250     ri mat = mat1 -- zipWith (/) mat1 mat2
 251       where
 252         mat1 = matSumCol n $ zipWith min'  (myMin mat) (myMin $ transpose mat)
 253         mat2 = total mat
 254     myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
 255     myMin = replicate (constant (Z :. n :. All)) . minimum
 256
 257 -}
 258
 259     -- TODO fix NaN
 260     -- Quali TEST: OK
 261     s_mi :: Acc (Matrix Double) -> Acc (Matrix Double)
 262     s_mi m' = zipWith (\x y -> log (x / y)) (diagNull n m')
 263             $ zipWith (/) (crossProduct n m') (total m')
 264             -- crossProduct n m'
 265
 266
 267     total :: Acc (Matrix Double) -> Acc (Matrix Double)
 268     total = replicate (constant (Z :. n :. n)) . sum . sum
 269
 270     n :: Dim
 271     n = dim m
 272
 273 -- run $ (identityMatrix (DAA.constant (10::Int)) :: DAA.Acc (DAA.Matrix Int)) Matrix (Z :. 10 :. 10)
 274 identityMatrix :: Num a => Exp Int -> Acc (Matrix a)
 275 identityMatrix n =
 276         let zeros = fill (index2 n n) 0
 277             ones  = fill (index1 n)   1
 278         in
 279         permute const zeros (\(unindex1 -> i) -> index2 i i) ones
 280
 281
 282 eyeMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
 283 eyeMatrix n' _m =
 284         let ones = fill (index2 n n) 1
 285             zeros  = fill (index1 n)   0
 286             n = constant n'
 287         in
 288         permute const ones (\(unindex1 -> i) -> index2 i i) zeros
 289
 290
 291 selfMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
 292 selfMatrix n' _m =
 293         let zeros = fill (index2 n n) 0
 294             ones  = fill (index2 n n) 1
 295             n = constant n'
 296         in
 297         permute const ones ( lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
 298                                                 -> -- ifThenElse (i /= j)
 299                                                      --         (Z :. i :. j)
 300                                                               (Z :. i :. i)
 301                                     )) zeros
 302
 303 selfMatrix' :: (Elt a, P.Num (Exp a)) => Array DIM2 a -> Matrix a
 304 selfMatrix' m' = run $ selfMatrix n m
 305   where
 306     n = dim m'
 307     m = use m'
 308
 309 -------------------------------------------------
 310 diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
 311 diagNull n m = zipWith (*) m eye
 312   where
 313     eye = eyeMatrix n m
 314
 315
 316 -------------------------------------------------
 317 crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 318 crossProduct n m = trace (P.show (run m',run m'')) $ zipWith (*) m' m''
 319   where
 320     m'  = cross n m
 321     m'' = cross n (transpose m)
 322
 323 crossT :: Matrix Double -> Matrix Double
 324 crossT  = run . transpose . use
 325
 326 crossProduct' :: Matrix Double -> Matrix Double
 327 crossProduct' m = run $ crossProduct n m'
 328   where
 329     n  = dim m
 330     m' = use m
 331
 332 runWith :: (Arrays c, Elt a1)
 333         => (Dim -> Acc (Matrix a1) -> a2 -> Acc c)
 334         -> Matrix a1
 335         -> a2
 336         -> c
 337 runWith f m = run . f (dim m) (use m)
 338
 339 -- | cross
 340 cross :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 341 cross n mat = diagNull n (matSumCol n $ diagNull n mat)
 342
 343 cross' :: Matrix Double -> Matrix Double
 344 cross' mat = run $ cross n mat'
 345   where
 346     mat' = use mat
 347     n = dim mat
 348
 349
 350 -----------------------------------------------------------------------
 351 -----------------------------------------------------------------------
 352 -- * Specificity and Genericity
 353
 354 {- | Metric Specificity and genericity: select terms
 355
 356 - let N termes and occurrences of i \[N{i}\]
 357
 358 - Cooccurrences of i and j \[N{ij}\]
 359 - Probability to get i given j : \[P(i|j)=N{ij}/N{j}\]
 360
 361 - Genericity of i  \[Gen(i)  = \frac{\sum_{j \neq i,j} P(i|j)}{N-1}\]
 362 - Specificity of j \[Spec(i) = \frac{\sum_{j \neq i,j} P(j|i)}{N-1}\]
 363
 364 - \[Inclusion (i) = Gen(i) + Spec(i)\)
 365 - \[GenericityScore = Gen(i)- Spec(i)\]
 366
 367 - References: Science mapping with asymmetrical paradigmatic proximity
 368 Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted
 369 on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276,
 370 arXiv:0803.2315 [cs.OH]
 371 -}
 372 type InclusionExclusion    = Double
 373 type SpecificityGenericity = Double
 374
 375 data SquareMatrix      = SymetricMatrix | NonSymetricMatrix
 376 type SymetricMatrix    = Matrix
 377 type NonSymetricMatrix = Matrix
 378
 379
 380 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
 381 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
 382   where
 383     run' fun mat = run $ fun $ map fromIntegral $ use mat
 384
 385     -- | Inclusion (i) = Gen(i)+Spec(i)
 386     inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
 387     inclusionExclusion mat = zipWith (+) (pV mat) (pV mat)
 388
 389     -- | Genericity score = Gen(i)- Spec(i)
 390     specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
 391     specificityGenericity mat = zipWith (+) (pH mat) (pH mat)
 392
 393     -- | Gen(i)  : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity  of i
 394     pV :: Acc (Matrix Double) -> Acc (Vector Double)
 395     pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
 396
 397     -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
 398     pH :: Acc (Matrix Double) -> Acc (Vector Double)
 399     pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
 400
 401     cardN :: Exp Double
 402     cardN = constant (P.fromIntegral (dim m) :: Double)
 403
 404
 405 -- | P(i|j) = Nij /N(jj) Probability to get i given j
 406 --p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
 407 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Matrix e) -> Acc (Matrix e)
 408 p_ij m = zipWith (/) m (n_jj m)
 409   where
 410     n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 411     n_jj myMat' = backpermute (shape m)
 412                          (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
 413                                    -> (Z :. j :. j)
 414                                 )
 415                          ) myMat'
 416
 417 -- | P(j|i) = Nij /N(ii) Probability to get i given j
 418 -- to test
 419 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 420 p_ji = transpose . p_ij
 421
 422
 423 -- | Step to ckeck the result in visual/qualitative tests
 424 incExcSpeGen_proba :: Matrix Int -> Matrix Double
 425 incExcSpeGen_proba m = run' pro m
 426   where
 427     run' fun mat = run $ fun $ map fromIntegral $ use mat
 428
 429     pro mat = p_ji mat
 430
 431 {-
 432 -- | Hypothesis to test maybe later (or not)
 433 -- TODO ask accelerate for instances to ease such writtings:
 434 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 435 p_ m = zipWith (/) m (n_ m)
 436   where
 437     n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 438     n_ m = backpermute (shape m)
 439                          (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 440                                    -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
 441                                 )
 442                          ) m
 443 -}
 444
 445 -- * For Tests (to be removed)
 446 -- | Test perfermance with this matrix
 447 -- TODO : add this in a benchmark folder
 448 distriTest :: Matrix Double
 449 distriTest = distributional $ matrix 100 [1..]
 450 -----------------------------------------------------------------------
 451