src/Gargantext/Viz/Graph/Distances/Matrice.hs

   1 {-|
   2 Module      : Gargantext.Graph.Distances.Matrix
   3 Description :
   4 Copyright   : (c) CNRS, 2017-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 This module aims at implementig distances of terms context by context is
  11 the same referential of corpus.
  12
  13 Implementation use Accelerate library which enables GPU and CPU computation:
  14
  15   * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
  16     [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
  17     In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
  18
  19   * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
  20     [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
  21     In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
  22
  23 -}
  24
  25 {-# LANGUAGE TypeFamilies        #-}
  26 {-# LANGUAGE TypeOperators       #-}
  27 {-# LANGUAGE ScopedTypeVariables #-}
  28 {-# LANGUAGE ViewPatterns        #-}
  29
  30 module Gargantext.Viz.Graph.Distances.Matrice
  31   where
  32
  33 import Debug.Trace (trace)
  34 import Data.Array.Accelerate
  35 import Data.Array.Accelerate.Interpreter (run)
  36 import qualified Gargantext.Prelude as P
  37
  38
  39 -----------------------------------------------------------------------
  40 -- | Define a vector
  41 --
  42 -- >>> vector 3
  43 -- Vector (Z :. 3) [0,1,2]
  44 vector :: Elt c => Int -> [c] -> (Array (Z :. Int) c)
  45 vector n l = fromList (Z :. n) l
  46
  47 -- | Define a matrix
  48 --
  49 -- >>> matrix 3 ([1..] :: [Double])
  50 -- Matrix (Z :. 3 :. 3)
  51 --   [ 1.0, 2.0, 3.0,
  52 --     4.0, 5.0, 6.0,
  53 --     7.0, 8.0, 9.0]
  54 matrix :: Elt c => Int -> [c] -> Matrix c
  55 matrix n l = fromList (Z :. n :. n) l
  56
  57 -- | Two ways to get the rank (as documentation)
  58 --
  59 -- >>> rank (matrix 3 ([1..] :: [Int]))
  60 -- 2
  61 rank :: (Matrix a) -> Int
  62 rank m = arrayRank $ arrayShape m
  63
  64 -----------------------------------------------------------------------
  65 -- | Dimension of a square Matrix
  66 -- How to force use with SquareMatrix ?
  67 type Dim = Int
  68
  69 -- | Get Dimension of a square Matrix
  70 --
  71 -- >>> dim (matrix 3 ([1..] :: [Int]))
  72 -- 3
  73 dim :: Matrix a -> Dim
  74 dim m = n
  75   where
  76     Z :. _ :. n = arrayShape m
  77     -- indexTail (arrayShape m)
  78
  79 -----------------------------------------------------------------------
  80 -- TODO move to Utils
  81 runExp :: Elt e => Exp e -> e
  82 runExp e = indexArray (run (unit e)) Z
  83 -----------------------------------------------------------------------
  84
  85 -- | Sum of a Matrix by Column
  86 --
  87 -- >>> run $ matSumCol 3 (use $ matrix 3 [1..])
  88 -- Matrix (Z :. 3 :. 3)
  89 --   [ 12.0, 15.0, 18.0,
  90 --     12.0, 15.0, 18.0,
  91 --     12.0, 15.0, 18.0]
  92 matSumCol :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
  93 matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
  94
  95 matSumCol' :: Matrix Double -> Matrix Double
  96 matSumCol' m = run $ matSumCol n m'
  97   where
  98     n  = dim m
  99     m' = use m
 100
 101
 102 -- | Proba computes de probability matrix: all cells divided by thee sum of its column
 103 -- if you need get the probability on the lines, just transpose it
 104 --
 105 -- >>> run $ matProba 3 (use $ matrix 3 [1..])
 106 -- Matrix (Z :. 3 :. 3)
 107 --   [ 8.333333333333333e-2, 0.13333333333333333, 0.16666666666666666,
 108 --       0.3333333333333333,  0.3333333333333333,  0.3333333333333333,
 109 --       0.5833333333333334,  0.5333333333333333,                 0.5]
 110 matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 111 matProba r mat = zipWith (/) mat (matSumCol r mat)
 112
 113 -- | Diagonal of the matrix
 114 --
 115 -- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
 116 -- Vector (Z :. 3) [1,5,9]
 117 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
 118 diag m = backpermute (indexTail (shape m))
 119                      (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int))))
 120                      m
 121
 122 -- | Divide by the Diagonal of the matrix
 123 --
 124 -- >>> run $ divByDiag 3 (use $ matrix 3 ([1..] :: [Double]))
 125 -- Matrix (Z :. 3 :. 3)
 126 --   [ 1.0, 0.4, 0.3333333333333333,
 127 --     4.0, 1.0, 0.6666666666666666,
 128 --     7.0, 1.6,                1.0]
 129 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 130 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
 131
 132 -----------------------------------------------------------------------
 133 -- | Filters the matrix with the minimum of maximums
 134 --
 135 -- >>> run $ matMiniMax $ use $ matrix 3 [1..]
 136 -- Matrix (Z :. 3 :. 3)
 137 --   [ 0.0, 4.0, 7.0,
 138 --     0.0, 5.0, 8.0,
 139 --     0.0, 6.0, 9.0]
 140 matMiniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
 141 matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
 142   where
 143     miniMax' = (the $ minimum $ maximum m)
 144
 145 -- | Filters the matrix with a constant
 146 --
 147 -- >>> run $ matFilter 5 $ use $ matrix 3 [1..]
 148 -- Matrix (Z :. 3 :. 3)
 149 --   [ 0.0, 0.0, 7.0,
 150 --     0.0, 0.0, 8.0,
 151 --     0.0, 6.0, 9.0]
 152 filter' :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
 153 filter' t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
 154
 155 -----------------------------------------------------------------------
 156 -- * Measures of proximity
 157 -----------------------------------------------------------------------
 158 -- ** Conditional distance
 159
 160 -- *** Conditional distance (basic)
 161
 162 -- | Conditional distance (basic version)
 163 --
 164 -- 2 main measures are actually implemented in order to compute the
 165 -- proximity of two terms: conditional and distributional
 166 --
 167 -- Conditional measure is an absolute measure which reflects
 168 -- interactions of 2 terms in the corpus.
 169 measureConditional :: Matrix Int -> Matrix Double
 170 --measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
 171 measureConditional m = run $ matProba (dim m)
 172                            $ map fromIntegral
 173                            $ use m
 174
 175
 176 -- *** Conditional distance (advanced)
 177
 178 -- | Conditional distance (advanced version)
 179 --
 180 -- The conditional measure P(i|j) of 2 terms @i@ and @j@, also called
 181 -- "confidence" , is the maximum probability between @i@ and @j@ to see
 182 -- @i@ in the same context of @j@ knowing @j@.
 183 --
 184 -- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
 185 -- in the corpus and _[n_{ij}\] the number of its occurrences we get:
 186 --
 187 -- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
 188 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
 189 conditional' m = ( run $ ie $ map fromIntegral $ use m
 190                  , run $ sg $ map fromIntegral $ use m
 191                  )
 192   where
 193     ie :: Acc (Matrix Double) -> Acc (Matrix Double)
 194     ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
 195     sg :: Acc (Matrix Double) -> Acc (Matrix Double)
 196     sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
 197
 198     n :: Exp Double
 199     n = P.fromIntegral r
 200
 201     r :: Dim
 202     r = dim m
 203
 204     xs :: Acc (Matrix Double) -> Acc (Matrix Double)
 205     xs mat = zipWith (-) (matSumCol r $ matProba r mat) (matProba r mat)
 206     ys :: Acc (Matrix Double) -> Acc (Matrix Double)
 207     ys mat = zipWith (-) (matSumCol r $ transpose $ matProba r mat) (matProba r mat)
 208
 209 -----------------------------------------------------------------------
 210 -- ** Distributional Distance
 211
 212 -- | Distributional Distance Measure
 213 --
 214 -- Distributional measure is a relative measure which depends on the
 215 -- selected list, it represents structural equivalence of mutual information.
 216 --
 217 -- The distributional measure P(c) of @i@ and @j@ terms is: \[
 218 -- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
 219 -- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
 220 --
 221 -- Mutual information
 222 -- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
 223 --
 224 -- Number of cooccurrences of @i@ and @j@ in the same context of text
 225 --                        \[C{ij}\]
 226 --
 227 -- The expected value of the cooccurrences @i@ and @j@ (given a map list of size @n@)
 228 --            \[E_{ij}^{m} = \frac {S_{i} S_{j}} {N_{m}}\]
 229 --
 230 -- Total cooccurrences of term @i@ given a map list of size @m@
 231 --            \[S_{i} = \sum_{j, j \neq i}^{m} S_{ij}\]
 232 --
 233 -- Total cooccurrences of terms given a map list of size @m@
 234 --            \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
 235 --
 236 distributional :: Matrix Int -> Matrix Double
 237 distributional m = run {- -- $ matMiniMax
 238                        -- $ ri
 239                        -- $ myMin
 240                        -}
 241                        $ filter' 0
 242                        $ s_mi
 243                        $ map fromIntegral
 244                           {- from Int to Double -}
 245                        $ use m
 246                           {- push matrix in Accelerate type -}
 247   where
 248     -- filter  m = zipWith (\a b -> max a b) m (transpose m)
 249
 250 {-
 251     ri :: Acc (Matrix Double) -> Acc (Matrix Double)
 252     ri mat = mat1 -- zipWith (/) mat1 mat2
 253       where
 254         mat1 = matSumCol n $ zipWith min'  (myMin mat) (myMin $ transpose mat)
 255         mat2 = total mat
 256     myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
 257     myMin = replicate (constant (Z :. n :. All)) . minimum
 258
 259 -}
 260
 261     -- TODO fix NaN
 262     -- Quali TEST: OK
 263     s_mi :: Acc (Matrix Double) -> Acc (Matrix Double)
 264     s_mi m' = zipWith (\x y -> log (x / y)) (diagNull n m')
 265             $ zipWith (/) (crossProduct n m') (total m')
 266             -- crossProduct n m'
 267
 268
 269     total :: Acc (Matrix Double) -> Acc (Matrix Double)
 270     total = replicate (constant (Z :. n :. n)) . sum . sum
 271
 272     n :: Dim
 273     n = dim m
 274
 275 -- run $ (identityMatrix (DAA.constant (10::Int)) :: DAA.Acc (DAA.Matrix Int)) Matrix (Z :. 10 :. 10)
 276 identityMatrix :: Num a => Exp Int -> Acc (Matrix a)
 277 identityMatrix n =
 278         let zeros = fill (index2 n n) 0
 279             ones  = fill (index1 n)   1
 280         in
 281         permute const zeros (\(unindex1 -> i) -> index2 i i) ones
 282
 283
 284 eyeMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
 285 eyeMatrix n' _m =
 286         let ones = fill (index2 n n) 1
 287             zeros  = fill (index1 n)   0
 288             n = constant n'
 289         in
 290         permute const ones (\(unindex1 -> i) -> index2 i i) zeros
 291
 292
 293 selfMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
 294 selfMatrix n' _m =
 295         let zeros = fill (index2 n n) 0
 296             ones  = fill (index2 n n) 1
 297             n = constant n'
 298         in
 299         permute const ones ( lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
 300                                                 -> -- ifThenElse (i /= j)
 301                                                      --         (Z :. i :. j)
 302                                                               (Z :. i :. i)
 303                                     )) zeros
 304
 305 selfMatrix' :: (Elt a, P.Num (Exp a)) => Array DIM2 a -> Matrix a
 306 selfMatrix' m' = run $ selfMatrix n m
 307   where
 308     n = dim m'
 309     m = use m'
 310
 311 -------------------------------------------------
 312 diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
 313 diagNull n m = zipWith (*) m eye
 314   where
 315     eye = eyeMatrix n m
 316
 317
 318 -------------------------------------------------
 319 crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 320 crossProduct n m = trace (P.show (run m',run m'')) $ zipWith (*) m' m''
 321   where
 322     m'  = cross n m
 323     m'' = transpose $ cross n m
 324
 325 crossT :: Matrix Double -> Matrix Double
 326 crossT  = run . transpose . use
 327
 328 crossProduct' :: Matrix Double -> Matrix Double
 329 crossProduct' m = run $ crossProduct n m'
 330   where
 331     n  = dim m
 332     m' = use m
 333
 334 runWith :: (Arrays c, Elt a1)
 335         => (Dim -> Acc (Matrix a1) -> a2 -> Acc c)
 336         -> Matrix a1
 337         -> a2
 338         -> c
 339 runWith f m = run . f (dim m) (use m)
 340
 341 -- | cross
 342 cross :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 343 cross n mat = diagNull n (matSumCol n $ diagNull n mat)
 344
 345 cross' :: Matrix Double -> Matrix Double
 346 cross' mat = run $ cross n mat'
 347   where
 348     mat' = use mat
 349     n = dim mat
 350
 351
 352 -----------------------------------------------------------------------
 353 -----------------------------------------------------------------------
 354 -- * Specificity and Genericity
 355
 356 {- | Metric Specificity and genericity: select terms
 357
 358 - let N termes and occurrences of i \[N{i}\]
 359
 360 - Cooccurrences of i and j \[N{ij}\]
 361 - Probability to get i given j : \[P(i|j)=N{ij}/N{j}\]
 362
 363 - Genericity of i  \[Gen(i)  = \frac{\sum_{j \neq i,j} P(i|j)}{N-1}\]
 364 - Specificity of j \[Spec(i) = \frac{\sum_{j \neq i,j} P(j|i)}{N-1}\]
 365
 366 - \[Inclusion (i) = Gen(i) + Spec(i)\)
 367 - \[GenericityScore = Gen(i)- Spec(i)\]
 368
 369 - References: Science mapping with asymmetrical paradigmatic proximity
 370 Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted
 371 on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276,
 372 arXiv:0803.2315 [cs.OH]
 373 -}
 374 type InclusionExclusion    = Double
 375 type SpecificityGenericity = Double
 376
 377 data SquareMatrix      = SymetricMatrix | NonSymetricMatrix
 378 type SymetricMatrix    = Matrix
 379 type NonSymetricMatrix = Matrix
 380
 381
 382 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
 383 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
 384   where
 385     run' fun mat = run $ fun $ map fromIntegral $ use mat
 386
 387     -- | Inclusion (i) = Gen(i)+Spec(i)
 388     inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
 389     inclusionExclusion mat = zipWith (+) (pV mat) (pV mat)
 390
 391     -- | Genericity score = Gen(i)- Spec(i)
 392     specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
 393     specificityGenericity mat = zipWith (+) (pH mat) (pH mat)
 394
 395     -- | Gen(i)  : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity  of i
 396     pV :: Acc (Matrix Double) -> Acc (Vector Double)
 397     pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
 398
 399     -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
 400     pH :: Acc (Matrix Double) -> Acc (Vector Double)
 401     pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
 402
 403     cardN :: Exp Double
 404     cardN = constant (P.fromIntegral (dim m) :: Double)
 405
 406
 407 -- | P(i|j) = Nij /N(jj) Probability to get i given j
 408 --p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
 409 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Matrix e) -> Acc (Matrix e)
 410 p_ij m = zipWith (/) m (n_jj m)
 411   where
 412     n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 413     n_jj myMat' = backpermute (shape m)
 414                          (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
 415                                    -> (Z :. j :. j)
 416                                 )
 417                          ) myMat'
 418
 419 -- | P(j|i) = Nij /N(ii) Probability to get i given j
 420 -- to test
 421 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 422 p_ji = transpose . p_ij
 423
 424
 425 -- | Step to ckeck the result in visual/qualitative tests
 426 incExcSpeGen_proba :: Matrix Int -> Matrix Double
 427 incExcSpeGen_proba m = run' pro m
 428   where
 429     run' fun mat = run $ fun $ map fromIntegral $ use mat
 430
 431     pro mat = p_ji mat
 432
 433 {-
 434 -- | Hypothesis to test maybe later (or not)
 435 -- TODO ask accelerate for instances to ease such writtings:
 436 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
 437 p_ m = zipWith (/) m (n_ m)
 438   where
 439     n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
 440     n_ m = backpermute (shape m)
 441                          (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
 442                                    -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
 443                                 )
 444                          ) m
 445 -}
 446
 447 -- * For Tests (to be removed)
 448 -- | Test perfermance with this matrix
 449 -- TODO : add this in a benchmark folder
 450 distriTest :: Int -> Matrix Double
 451 distriTest n = distributional (matrix n theMatrix)
 452   where
 453     theMatrix | (P.==) n 3 =  [ 1, 1, 2
 454                               , 1, 2, 3
 455                               , 2, 3, 4
 456                               ]
 457               | P.otherwise = [ 1, 1
 458                               , 1, 2
 459                               ]
 460
 461
 462     theResult | (P.==) n 2 = let r = 1.6094379124341003 in [ 0, r, r, 0]
 463               | P.otherwise = [ 1, 1 ]
 464
 465
 466
 467
 468 -----------------------------------------------------------------------
 469