src/Gargantext/Viz/Graph/Distances/Matrice.hs

   1 {-|
   2 Module      : Gargantext.Graph.Distances.Matrix
   3 Description :
   4 Copyright   : (c) CNRS, 2017-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 This module aims at implementig distances of terms context by context is
  11 the same referential of corpus.
  12
  13
  14 Implementation use Accelerate library which enables GPU and CPU computation:
  15
  16   * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
  17     [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
  18     In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
  19
  20   * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
  21     [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
  22     In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
  23
  24 -}
  25
  26 {-# LANGUAGE TypeFamilies        #-}
  27 {-# LANGUAGE TypeOperators       #-}
  28 {-# LANGUAGE ScopedTypeVariables #-}
  29 {-# LANGUAGE ViewPatterns        #-}
  30
  31 module Gargantext.Viz.Graph.Distances.Matrice
  32   where
  33
  34 import Debug.Trace (trace)
  35 import Data.Array.Accelerate
  36 import Data.Array.Accelerate.Interpreter (run)
  37 import qualified Gargantext.Prelude as P
  38
  39
  40 -----------------------------------------------------------------------
  41 -- | Define a vector
  42 --
  43 -- >>> vector 3
  44 -- Vector (Z :. 3) [0,1,2]
  45 vector :: Elt c => Int -> [c] -> (Array (Z :. Int) c)
  46 vector n l = fromList (Z :. n) l
  47
  48 -- | Define a matrix
  49 --
  50 -- >>> matrix 3 ([1..] :: [Double])
  51 -- Matrix (Z :. 3 :. 3)
  52 --   [ 1.0, 2.0, 3.0,
  53 --     4.0, 5.0, 6.0,
  54 --     7.0, 8.0, 9.0]
  55 matrix :: Elt c => Int -> [c] -> Matrix c
  56 matrix n l = fromList (Z :. n :. n) l
  57
  58 -- | Two ways to get the rank (as documentation)
  59 --
  60 -- >>> rank (matrix 3 ([1..] :: [Int]))
  61 -- 2
  62 rank :: (Matrix a) -> Int
  63 rank m = arrayRank $ arrayShape m
  64
  65 -----------------------------------------------------------------------
  66 -- | Dimension of a square Matrix
  67 -- How to force use with SquareMatrix ?
  68 type Dim = Int
  69
  70 -- | Get Dimension of a square Matrix
  71 --
  72 -- >>> dim (matrix 3 ([1..] :: [Int]))
  73 -- 3
  74 dim :: Matrix a -> Dim
  75 dim m = n
  76   where
  77     Z :. _ :. n = arrayShape m
  78     -- indexTail (arrayShape m)
  79
  80 -----------------------------------------------------------------------
  81 -- TODO move to Utils
  82 runExp :: Elt e => Exp e -> e
  83 runExp e = indexArray (run (unit e)) Z
  84 -----------------------------------------------------------------------
  85
  86 -- | Sum of a Matrix by Column
  87 --
  88 -- >>> run $ matSumCol 3 (use $ matrix 3 [1..])
  89 -- Matrix (Z :. 3 :. 3)
  90 --   [ 12.0, 15.0, 18.0,
  91 --     12.0, 15.0, 18.0,
  92 --     12.0, 15.0, 18.0]
  93 matSumCol :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  94 matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
  95
  96 matSumCol' :: Matrix Double -> Matrix Double
  97 matSumCol' m = run $ matSumCol n m'
  98   where
  99     n  = dim m
 100     m' = use m
 101
 102
 103 -- | Proba computes de probability matrix: all cells divided by thee sum of its column
 104 -- if you need get the probability on the lines, just transpose it
 105 --
 106 -- >>> run $ matProba 3 (use $ matrix 3 [1..])
 107 -- Matrix (Z :. 3 :. 3)
 108 --   [ 8.333333333333333e-2, 0.13333333333333333, 0.16666666666666666,
 109 --       0.3333333333333333,  0.3333333333333333,  0.3333333333333333,
 110 --       0.5833333333333334,  0.5333333333333333,                 0.5]
 111 matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 112 matProba r mat = zipWith (/) mat (matSumCol r mat)
 113
 114 -- | Diagonal of the matrix
 115 --
 116 -- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
 117 -- Vector (Z :. 3) [1,5,9]
 118 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
 119 diag m = backpermute (indexTail (shape m))
 120                      (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int))))
 121                      m
 122
 123 -- | Divide by the Diagonal of the matrix
 124 --
 125 -- >>> run $ divByDiag 3 (use $ matrix 3 ([1..] :: [Double]))
 126 -- Matrix (Z :. 3 :. 3)
 127 --   [ 1.0, 0.4, 0.3333333333333333,
 128 --     4.0, 1.0, 0.6666666666666666,
 129 --     7.0, 1.6,                1.0]
 130 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 131 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
 132
 133 -----------------------------------------------------------------------
 134 -- | Filters the matrix with the minimum of maximums
 135 --
 136 -- >>> run $ matMiniMax $ use $ matrix 3 [1..]
 137 -- Matrix (Z :. 3 :. 3)
 138 --   [ 0.0, 4.0, 7.0,
 139 --     0.0, 5.0, 8.0,
 140 --     0.0, 6.0, 9.0]
 141 matMiniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
 142 matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
 143   where
 144     miniMax' = (the $ minimum $ maximum m)
 145
 146 -- | Filters the matrix with a constant
 147 --
 148 -- >>> run $ matFilter 5 $ use $ matrix 3 [1..]
 149 -- Matrix (Z :. 3 :. 3)
 150 --   [ 0.0, 0.0, 7.0,
 151 --     0.0, 0.0, 8.0,
 152 --     0.0, 6.0, 9.0]
 153 filter' :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
 154 filter' t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
 155
 156 -----------------------------------------------------------------------
 157 -- * Measures of proximity
 158 -----------------------------------------------------------------------
 159 -- ** Conditional distance
 160
 161 -- *** Conditional distance (basic)
 162
 163 -- | Conditional distance (basic version)
 164 --
 165 -- 2 main measures are actually implemented in order to compute the
 166 -- proximity of two terms: conditional and distributional
 167 --
 168 -- Conditional measure is an absolute measure which reflects
 169 -- interactions of 2 terms in the corpus.
 170 measureConditional :: Matrix Int -> Matrix Double
 171 --measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
 172 measureConditional m = run $ matProba (dim m)
 173                            $ map fromIntegral
 174                            $ use m
 175
 176
 177 -- *** Conditional distance (advanced)
 178
 179 -- | Conditional distance (advanced version)
 180 --
 181 -- The conditional measure P(i|j) of 2 terms @i@ and @j@, also called
 182 -- "confidence" , is the maximum probability between @i@ and @j@ to see
 183 -- @i@ in the same context of @j@ knowing @j@.
 184 --
 185 -- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
 186 -- in the corpus and _[n_{ij}\] the number of its occurrences we get:
 187 --
 188 -- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
 189 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
 190 conditional' m = ( run $ ie $ map fromIntegral $ use m
 191                  , run $ sg $ map fromIntegral $ use m
 192                  )
 193   where
 194     ie :: Acc (Matrix Double) -> Acc (Matrix Double)
 195     ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
 196     sg :: Acc (Matrix Double) -> Acc (Matrix Double)
 197     sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
 198
 199     n :: Exp Double
 200     n = P.fromIntegral r
 201
 202     r :: Dim
 203     r = dim m
 204
 205     xs :: Acc (Matrix Double) -> Acc (Matrix Double)
 206     xs mat = zipWith (-) (matSumCol r $ matProba r mat) (matProba r mat)
 207     ys :: Acc (Matrix Double) -> Acc (Matrix Double)
 208     ys mat = zipWith (-) (matSumCol r $ transpose $ matProba r mat) (matProba r mat)
 209
 210 -----------------------------------------------------------------------
 211 -- ** Distributional Distance
 212
 213 -- | Distributional Distance Measure
 214 --
 215 -- Distributional measure is a relative measure which depends on the
 216 -- selected list, it represents structural equivalence of mutual information.
 217 --
 218 -- The distributional measure P(c) of @i@ and @j@ terms is: \[
 219 -- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
 220 -- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
 221 --
 222 -- Mutual information
 223 -- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
 224 --
 225 -- Number of cooccurrences of @i@ and @j@ in the same context of text
 226 --                        \[C{ij}\]
 227 --
 228 -- The expected value of the cooccurrences @i@ and @j@ (given a map list of size @n@)
 229 --            \[E_{ij}^{m} = \frac {S_{i} S_{j}} {N_{m}}\]
 230 --
 231 -- Total cooccurrences of term @i@ given a map list of size @m@
 232 --            \[S_{i} = \sum_{j, j \neq i}^{m} S_{ij}\]
 233 --
 234 -- Total cooccurrences of terms given a map list of size @m@
 235 --            \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
 236 --
 237 distributional :: Matrix Int -> Matrix Double
 238 distributional m = run {- -- $ matMiniMax
 239                        -- $ ri
 240                        -- $ myMin
 241                        -}
 242                        $ filter' 0
 243                        $ s_mi
 244                        $ map fromIntegral
 245                           {- from Int to Double -}
 246                        $ use m
 247                           {- push matrix in Accelerate type -}
 248   where
 249     -- filter  m = zipWith (\a b -> max a b) m (transpose m)
 250
 251 {-
 252     ri :: Acc (Matrix Double) -> Acc (Matrix Double)
 253     ri mat = mat1 -- zipWith (/) mat1 mat2
 254       where
 255         mat1 = matSumCol n $ zipWith min'  (myMin mat) (myMin $ transpose mat)
 256         mat2 = total mat
 257     myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
 258     myMin = replicate (constant (Z :. n :. All)) . minimum
 259
 260 -}
 261
 262     -- TODO fix NaN
 263     -- Quali TEST: OK
 264     s_mi :: Acc (Matrix Double) -> Acc (Matrix Double)
 265     s_mi m' = zipWith (\x y -> log (x / y)) (diagNull n m')
 266             $ zipWith (/) (crossProduct n m') (total m')
 267             -- crossProduct n m'
 268
 269
 270     total :: Acc (Matrix Double) -> Acc (Matrix Double)
 271     total = replicate (constant (Z :. n :. n)) . sum . sum
 272
 273     n :: Dim
 274     n = dim m
 275
 276 -- run $ (identityMatrix (DAA.constant (10::Int)) :: DAA.Acc (DAA.Matrix Int)) Matrix (Z :. 10 :. 10)
 277 identityMatrix :: Num a => Exp Int -> Acc (Matrix a)
 278 identityMatrix n =
 279         let zeros = fill (index2 n n) 0
 280             ones  = fill (index1 n)   1
 281         in
 282         permute const zeros (\(unindex1 -> i) -> index2 i i) ones
 283
 284
 285 eyeMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
 286 eyeMatrix n' _m =
 287         let ones = fill (index2 n n) 1
 288             zeros  = fill (index1 n)   0
 289             n = constant n'
 290         in
 291         permute const ones (\(unindex1 -> i) -> index2 i i) zeros
 292
 293
 294 selfMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
 295 selfMatrix n' _m =
 296         let zeros = fill (index2 n n) 0
 297             ones  = fill (index2 n n) 1
 298             n = constant n'
 299         in
 300         permute const ones ( lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
 301                                                 -> -- ifThenElse (i /= j)
 302                                                      --         (Z :. i :. j)
 303                                                               (Z :. i :. i)
 304                                     )) zeros
 305
 306 selfMatrix' :: (Elt a, P.Num (Exp a)) => Array DIM2 a -> Matrix a
 307 selfMatrix' m' = run $ selfMatrix n m
 308   where
 309     n = dim m'
 310     m = use m'
 311
 312 -------------------------------------------------
 313 diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
 314 diagNull n m = zipWith (*) m eye
 315   where
 316     eye = eyeMatrix n m
 317
 318
 319 -------------------------------------------------
 320 crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 321 crossProduct n m = trace (P.show (run m',run m'')) $ zipWith (*) m' m''
 322   where
 323     m'  = cross n m
 324     m'' = cross n (transpose m)
 325
 326 crossT :: Matrix Double -> Matrix Double
 327 crossT  = run . transpose . use
 328
 329 crossProduct' :: Matrix Double -> Matrix Double
 330 crossProduct' m = run $ crossProduct n m'
 331   where
 332     n  = dim m
 333     m' = use m
 334
 335 runWith :: (Arrays c, Elt a1)
 336         => (Dim -> Acc (Matrix a1) -> a2 -> Acc c)
 337         -> Matrix a1
 338         -> a2
 339         -> c
 340 runWith f m = run . f (dim m) (use m)
 341
 342 -- | cross
 343 cross :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 344 cross n mat = diagNull n (matSumCol n $ diagNull n mat)
 345
 346 cross' :: Matrix Double -> Matrix Double
 347 cross' mat = run $ cross n mat'
 348   where
 349     mat' = use mat
 350     n = dim mat
 351
 352
 353 -----------------------------------------------------------------------
 354 -----------------------------------------------------------------------
 355 -- * Specificity and Genericity
 356
 357 {- | Metric Specificity and genericity: select terms
 358
 359 - let N termes and occurrences of i \[N{i}\]
 360
 361 - Cooccurrences of i and j \[N{ij}\]
 362 - Probability to get i given j : \[P(i|j)=N{ij}/N{j}\]
 363
 364 - Genericity of i  \[Gen(i)  = \frac{\sum_{j \neq i,j} P(i|j)}{N-1}\]
 365 - Specificity of j \[Spec(i) = \frac{\sum_{j \neq i,j} P(j|i)}{N-1}\]
 366
 367 - \[Inclusion (i) = Gen(i) + Spec(i)\)
 368 - \[GenericityScore = Gen(i)- Spec(i)\]
 369
 370 - References: Science mapping with asymmetrical paradigmatic proximity
 371 Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted
 372 on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276,
 373 arXiv:0803.2315 [cs.OH]
 374 -}
 375 type InclusionExclusion    = Double
 376 type SpecificityGenericity = Double
 377
 378 data SquareMatrix      = SymetricMatrix | NonSymetricMatrix
 379 type SymetricMatrix    = Matrix
 380 type NonSymetricMatrix = Matrix
 381
 382
 383 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
 384 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
 385   where
 386     run' fun mat = run $ fun $ map fromIntegral $ use mat
 387
 388     -- | Inclusion (i) = Gen(i)+Spec(i)
 389     inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
 390     inclusionExclusion mat = zipWith (+) (pV mat) (pV mat)
 391
 392     -- | Genericity score = Gen(i)- Spec(i)
 393     specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
 394     specificityGenericity mat = zipWith (+) (pH mat) (pH mat)
 395
 396     -- | Gen(i)  : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity  of i
 397     pV :: Acc (Matrix Double) -> Acc (Vector Double)
 398     pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
 399
 400     -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
 401     pH :: Acc (Matrix Double) -> Acc (Vector Double)
 402     pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
 403
 404     cardN :: Exp Double
 405     cardN = constant (P.fromIntegral (dim m) :: Double)
 406
 407
 408 -- | P(i|j) = Nij /N(jj) Probability to get i given j
 409 --p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
 410 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Matrix e) -> Acc (Matrix e)
 411 p_ij m = zipWith (/) m (n_jj m)
 412   where
 413     n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 414     n_jj myMat' = backpermute (shape m)
 415                          (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
 416                                    -> (Z :. j :. j)
 417                                 )
 418                          ) myMat'
 419
 420 -- | P(j|i) = Nij /N(ii) Probability to get i given j
 421 -- to test
 422 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 423 p_ji = transpose . p_ij
 424
 425
 426 -- | Step to ckeck the result in visual/qualitative tests
 427 incExcSpeGen_proba :: Matrix Int -> Matrix Double
 428 incExcSpeGen_proba m = run' pro m
 429   where
 430     run' fun mat = run $ fun $ map fromIntegral $ use mat
 431
 432     pro mat = p_ji mat
 433
 434 {-
 435 -- | Hypothesis to test maybe later (or not)
 436 -- TODO ask accelerate for instances to ease such writtings:
 437 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 438 p_ m = zipWith (/) m (n_ m)
 439   where
 440     n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 441     n_ m = backpermute (shape m)
 442                          (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 443                                    -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
 444                                 )
 445                          ) m
 446 -}
 447
 448 -- * For Tests (to be removed)
 449 -- | Test perfermance with this matrix
 450 -- TODO : add this in a benchmark folder
 451 distriTest :: Matrix Double
 452 distriTest = distributional $ matrix 100 [1..]
 453 -----------------------------------------------------------------------
 454