2 Module : Gargantext.Core.Methods.Distances.Accelerate.Distributional
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 This module aims at implementig distances of terms context by context is
11 the same referential of corpus.
13 Implementation use Accelerate library which enables GPU and CPU computation
14 See Gargantext.Core.Methods.Graph.Accelerate)
18 {-# LANGUAGE TypeFamilies #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE ViewPatterns #-}
23 module Gargantext.Core.Methods.Distances.Accelerate.Distributional
26 -- import qualified Data.Foldable as P (foldl1)
27 -- import Debug.Trace (trace)
28 import Data.Array.Accelerate
29 import Data.Array.Accelerate.Interpreter (run)
30 import Gargantext.Core.Methods.Matrix.Accelerate.Utils
31 import qualified Gargantext.Prelude as P
34 -- * Metrics of proximity
35 -----------------------------------------------------------------------
36 -- ** Distributional Distance
38 -- | Distributional Distance metric
40 -- Distributional metric is a relative metric which depends on the
41 -- selected list, it represents structural equivalence of mutual information.
43 -- The distributional metric P(c) of @i@ and @j@ terms is: \[
44 -- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
45 -- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
48 -- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
50 -- Number of cooccurrences of @i@ and @j@ in the same context of text
53 -- The expected value of the cooccurrences @i@ and @j@ (given a map list of size @n@)
54 -- \[E_{ij}^{m} = \frac {S_{i} S_{j}} {N_{m}}\]
56 -- Total cooccurrences of term @i@ given a map list of size @m@
57 -- \[S_{i} = \sum_{j, j \neq i}^{m} S_{ij}\]
59 -- Total cooccurrences of terms given a map list of size @m@
60 -- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
62 distributional :: Matrix Int -> Matrix Double
63 distributional m = -- run {- $ matMiniMax -}
70 {- from Int to Double -}
72 {- push matrix in Accelerate type -}
75 _ri :: Acc (Matrix Double) -> Acc (Matrix Double)
76 _ri mat = mat1 -- zipWith (/) mat1 mat2
78 mat1 = matSumCol n $ zipWith min (_myMin mat) (_myMin $ filterWith 0 100 $ diagNull n $ transpose mat)
81 _myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
82 _myMin = replicate (constant (Z :. n :. All)) . minimum
87 s_mi :: Acc (Matrix Double) -> Acc (Matrix Double)
88 s_mi m' = zipWith (\x y -> log (x / y)) (diagNull n m')
89 $ zipWith (/) (crossProduct n m') (total m')
93 total :: Acc (Matrix Double) -> Acc (Matrix Double)
94 total = replicate (constant (Z :. n :. n)) . sum . sum
99 rIJ :: (Elt a, Ord a, P.Fractional (Exp a), P.Num a)
100 => Dim -> Acc (Matrix a) -> Acc (Matrix a)
101 rIJ n m = matMiniMax $ divide a b
106 -- * For Tests (to be removed)
107 -- | Test perfermance with this matrix
108 -- TODO : add this in a benchmark folder
109 distriTest :: Int -> Matrix Double
110 distriTest n = distributional (theMatrix n)