-- import qualified Data.Foldable as P (foldl1)
-- import Debug.Trace (trace)
-import Data.Array.Accelerate
+import Data.Array.Accelerate as A
import Data.Array.Accelerate.Interpreter (run)
import Gargantext.Core.Methods.Matrix.Accelerate.Utils
import qualified Gargantext.Prelude as P
------------------------------------------------------------------------
--- * Distributional Distance
-distributional'' :: ( P.Num (Exp a)
- , P.Fractional (Exp a)
- , Elt a
- )
- => Matrix a -> Matrix a
-distributional'' m' = run $ mi_d_mi
+-- | `distributional m` returns the distributional distance between terms each
+-- pair of terms as a matrix. The argument m is the matrix $[n_{ij}]_{i,j}$
+-- where $n_{ij}$ is the coocccurrence between term $i$ and term $j$.
+--
+-- ## Basic example with Matrix of size 3:
+--
+-- >>> theMatrixInt 3
+-- Matrix (Z :. 3 :. 3)
+-- [ 7, 4, 0,
+-- 4, 5, 3,
+-- 0, 3, 4]
+--
+-- >>> distributional $ theMatrixInt 3
+-- Matrix (Z :. 3 :. 3)
+-- [ 1.0, 0.0, 0.9843749999999999,
+-- 0.0, 1.0, 0.0,
+-- 1.0, 0.0, 1.0]
+--
+-- ## Basic example with Matrix of size 4:
+--
+-- >>> theMatrixInt 4
+-- Matrix (Z :. 4 :. 4)
+-- [ 4, 1, 2, 1,
+-- 1, 4, 0, 0,
+-- 2, 0, 3, 3,
+-- 1, 0, 3, 3]
+--
+-- >>> distributional $ theMatrixInt 4
+-- Matrix (Z :. 4 :. 4)
+-- [ 1.0, 0.0, 0.5714285714285715, 0.8421052631578947,
+-- 0.0, 1.0, 1.0, 1.0,
+-- 8.333333333333333e-2, 4.6875e-2, 1.0, 0.25,
+-- 0.3333333333333333, 5.7692307692307696e-2, 1.0, 1.0]
+--
+distributional :: Matrix Int -> Matrix Double
+distributional m' = run result
where
- m = use m'
+ m = map fromIntegral $ use m'
n = dim m'
- d_m = (.*) (matrixIdentity n) m
+ diag_m = diag m
- o_d_m = (#*#) (matrixOne n) d_m
- d_m_o = (#*#) d_m (matrixOne n)
+ d_1 = replicate (constant (Z :. n :. All)) diag_m
+ d_2 = replicate (constant (Z :. All :. n)) diag_m
+ mi = (.*) ((./) m d_1) ((./) m d_2)
- mi = (.*) ((./) m o_d_m) ((./) m o_d_m)
+ -- w = (.-) mi d_mi
+
+ -- The matrix permutations is taken care of below by directly replicating
+ -- the matrix mi, making the matrix w unneccessary and saving one step.
+ w_1 = replicate (constant (Z :. All :. n :. All)) mi
+ w_2 = replicate (constant (Z :. n :. All :. All)) mi
+ w' = zipWith min w_1 w_2
- d_mi = (.*) (matrixIdentity n) mi
- mi_d_mi = (.-) mi d_mi
+ -- The matrix ii = [r_{i,j,k}]_{i,j,k} has r_(i,j,k) = 0 if k = i OR k = j
+ -- and r_(i,j,k) = 1 otherwise (i.e. k /= i AND k /= j).
+ ii = generate (constant (Z :. n :. n :. n))
+ (lift1 (\(Z :. i :. j :. k) -> cond ((&&) ((/=) k i) ((/=) k j)) 1 0))
+ z_1 = sum ((.*) w' ii)
+ z_2 = sum ((.*) w_1 ii)
+ result = termDivNan z_1 z_2
+logDistributional :: Matrix Int -> Matrix Double
+logDistributional m = run
+ $ diagNull n
+ $ matMiniMax
+ $ logDistributional' n m
+ where
+ n = dim m
+
+logDistributional' :: Int -> Matrix Int -> Acc (Matrix Double)
+logDistributional' n m' = result
+ where
+ -- From Matrix Int to Matrix Double, i.e :
+ -- m :: Matrix Int -> Matrix Double
+ m = map fromIntegral $ use m'
+ -- Scalar. Sum of all elements of m.
+ to = the $ sum (flatten m)
+ -- Diagonal matrix with the diagonal of m.
+ d_m = (.*) m (matrixIdentity n)
+ -- Size n vector. s = [s_i]_i
+ s = sum ((.-) m d_m)
+ -- Matrix nxn. Vector s replicated as rows.
+ s_1 = replicate (constant (Z :. All :. n)) s
+ -- Matrix nxn. Vector s replicated as columns.
+ s_2 = replicate (constant (Z :. n :. All)) s
+ -- Matrix nxn. ss = [s_i * s_j]_{i,j}. Outer product of s with itself.
+ ss = (.*) s_1 s_2
+ -- Matrix nxn. mi = [m_{i,j}]_{i,j} where
+ -- m_{i,j} = 0 if n_{i,j} = 0 or i = j,
+ -- m_{i,j} = log(to * n_{i,j} / s_{i,j}) otherwise.
+ mi = (.*) (matrixEye n)
+ (map (lift1 (\x -> cond (x == 0) 0 (log (x * to)))) ((./) m ss))
+ -- Tensor nxnxn. Matrix mi replicated along the 2nd axis.
+ w_1 = replicate (constant (Z :. All :. n :. All)) mi
+ -- Tensor nxnxn. Matrix mi replicated along the 1st axis.
+ w_2 = replicate (constant (Z :. n :. All :. All)) mi
+ -- Tensor nxnxn.
+ w' = zipWith min w_1 w_2
+ -- A predicate that is true when the input (i, j, k) satisfy
+ -- k /= i AND k /= j
+ k_diff_i_and_j = lift1 (\(Z :. i :. j :. k) -> ((&&) ((/=) k i) ((/=) k j)))
+
+ -- Matrix nxn.
+ sumMin = sum (condOrDefault k_diff_i_and_j 0 w')
+
+ -- Matrix nxn. All columns are the same.
+ sumM = sum (condOrDefault k_diff_i_and_j 0 w_1)
+
+ result = termDivNan sumMin sumM
---
-- The distributional metric P(c) of @i@ and @j@ terms is: \[
-- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
--
-distributional :: Matrix Int -> Matrix Double
-distributional m = -- run {- $ matMiniMax -}
+distributional'' :: Matrix Int -> Matrix Double
+distributional'' m = -- run {- $ matMiniMax -}
run $ diagNull n
$ rIJ n
$ filterWith 0 100
-- | Test perfermance with this matrix
-- TODO : add this in a benchmark folder
distriTest :: Int -> Matrix Double
-distriTest n = distributional (theMatrixInt n)
+distriTest n = logDistributional (theMatrixInt n)