]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
[GRAPH|DIST] distri partial test with n =3
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 This module aims at implementig distances of terms context by context is
11 the same referential of corpus.
12
13 Implementation use Accelerate library which enables GPU and CPU computation:
14
15 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
16 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
17 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
18
19 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
20 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
21 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
22
23 -}
24
25 {-# LANGUAGE TypeFamilies #-}
26 {-# LANGUAGE TypeOperators #-}
27 {-# LANGUAGE ScopedTypeVariables #-}
28 {-# LANGUAGE ViewPatterns #-}
29
30 module Gargantext.Viz.Graph.Distances.Matrice
31 where
32
33 import Debug.Trace (trace)
34 import Data.Array.Accelerate
35 import Data.Array.Accelerate.Interpreter (run)
36 import qualified Gargantext.Prelude as P
37
38
39 -----------------------------------------------------------------------
40 -- | Define a vector
41 --
42 -- >>> vector 3
43 -- Vector (Z :. 3) [0,1,2]
44 vector :: Elt c => Int -> [c] -> (Array (Z :. Int) c)
45 vector n l = fromList (Z :. n) l
46
47 -- | Define a matrix
48 --
49 -- >>> matrix 3 ([1..] :: [Double])
50 -- Matrix (Z :. 3 :. 3)
51 -- [ 1.0, 2.0, 3.0,
52 -- 4.0, 5.0, 6.0,
53 -- 7.0, 8.0, 9.0]
54 matrix :: Elt c => Int -> [c] -> Matrix c
55 matrix n l = fromList (Z :. n :. n) l
56
57 -- | Two ways to get the rank (as documentation)
58 --
59 -- >>> rank (matrix 3 ([1..] :: [Int]))
60 -- 2
61 rank :: (Matrix a) -> Int
62 rank m = arrayRank $ arrayShape m
63
64 -----------------------------------------------------------------------
65 -- | Dimension of a square Matrix
66 -- How to force use with SquareMatrix ?
67 type Dim = Int
68
69 -- | Get Dimension of a square Matrix
70 --
71 -- >>> dim (matrix 3 ([1..] :: [Int]))
72 -- 3
73 dim :: Matrix a -> Dim
74 dim m = n
75 where
76 Z :. _ :. n = arrayShape m
77 -- indexTail (arrayShape m)
78
79 -----------------------------------------------------------------------
80 -- TODO move to Utils
81 runExp :: Elt e => Exp e -> e
82 runExp e = indexArray (run (unit e)) Z
83 -----------------------------------------------------------------------
84
85 -- | Sum of a Matrix by Column
86 --
87 -- >>> run $ matSumCol 3 (use $ matrix 3 [1..])
88 -- Matrix (Z :. 3 :. 3)
89 -- [ 12.0, 15.0, 18.0,
90 -- 12.0, 15.0, 18.0,
91 -- 12.0, 15.0, 18.0]
92 matSumCol :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
93 matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
94
95 matSumCol' :: Matrix Double -> Matrix Double
96 matSumCol' m = run $ matSumCol n m'
97 where
98 n = dim m
99 m' = use m
100
101
102 -- | Proba computes de probability matrix: all cells divided by thee sum of its column
103 -- if you need get the probability on the lines, just transpose it
104 --
105 -- >>> run $ matProba 3 (use $ matrix 3 [1..])
106 -- Matrix (Z :. 3 :. 3)
107 -- [ 8.333333333333333e-2, 0.13333333333333333, 0.16666666666666666,
108 -- 0.3333333333333333, 0.3333333333333333, 0.3333333333333333,
109 -- 0.5833333333333334, 0.5333333333333333, 0.5]
110 matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
111 matProba r mat = zipWith (/) mat (matSumCol r mat)
112
113 -- | Diagonal of the matrix
114 --
115 -- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
116 -- Vector (Z :. 3) [1,5,9]
117 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
118 diag m = backpermute (indexTail (shape m))
119 (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int))))
120 m
121
122 -- | Divide by the Diagonal of the matrix
123 --
124 -- >>> run $ divByDiag 3 (use $ matrix 3 ([1..] :: [Double]))
125 -- Matrix (Z :. 3 :. 3)
126 -- [ 1.0, 0.4, 0.3333333333333333,
127 -- 4.0, 1.0, 0.6666666666666666,
128 -- 7.0, 1.6, 1.0]
129 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
130 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
131
132 -----------------------------------------------------------------------
133 -- | Filters the matrix with the minimum of maximums
134 --
135 -- >>> run $ matMiniMax $ use $ matrix 3 [1..]
136 -- Matrix (Z :. 3 :. 3)
137 -- [ 0.0, 4.0, 7.0,
138 -- 0.0, 5.0, 8.0,
139 -- 0.0, 6.0, 9.0]
140 matMiniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
141 matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
142 where
143 miniMax' = (the $ minimum $ maximum m)
144
145 -- | Filters the matrix with a constant
146 --
147 -- >>> run $ matFilter 5 $ use $ matrix 3 [1..]
148 -- Matrix (Z :. 3 :. 3)
149 -- [ 0.0, 0.0, 7.0,
150 -- 0.0, 0.0, 8.0,
151 -- 0.0, 6.0, 9.0]
152 filter' :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
153 filter' t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
154
155 -----------------------------------------------------------------------
156 -- * Measures of proximity
157 -----------------------------------------------------------------------
158 -- ** Conditional distance
159
160 -- *** Conditional distance (basic)
161
162 -- | Conditional distance (basic version)
163 --
164 -- 2 main measures are actually implemented in order to compute the
165 -- proximity of two terms: conditional and distributional
166 --
167 -- Conditional measure is an absolute measure which reflects
168 -- interactions of 2 terms in the corpus.
169 measureConditional :: Matrix Int -> Matrix Double
170 --measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
171 measureConditional m = run $ matProba (dim m)
172 $ map fromIntegral
173 $ use m
174
175
176 -- *** Conditional distance (advanced)
177
178 -- | Conditional distance (advanced version)
179 --
180 -- The conditional measure P(i|j) of 2 terms @i@ and @j@, also called
181 -- "confidence" , is the maximum probability between @i@ and @j@ to see
182 -- @i@ in the same context of @j@ knowing @j@.
183 --
184 -- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
185 -- in the corpus and _[n_{ij}\] the number of its occurrences we get:
186 --
187 -- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
188 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
189 conditional' m = ( run $ ie $ map fromIntegral $ use m
190 , run $ sg $ map fromIntegral $ use m
191 )
192 where
193 ie :: Acc (Matrix Double) -> Acc (Matrix Double)
194 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
195 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
196 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
197
198 n :: Exp Double
199 n = P.fromIntegral r
200
201 r :: Dim
202 r = dim m
203
204 xs :: Acc (Matrix Double) -> Acc (Matrix Double)
205 xs mat = zipWith (-) (matSumCol r $ matProba r mat) (matProba r mat)
206 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
207 ys mat = zipWith (-) (matSumCol r $ transpose $ matProba r mat) (matProba r mat)
208
209 -----------------------------------------------------------------------
210 -- ** Distributional Distance
211
212 -- | Distributional Distance Measure
213 --
214 -- Distributional measure is a relative measure which depends on the
215 -- selected list, it represents structural equivalence of mutual information.
216 --
217 -- The distributional measure P(c) of @i@ and @j@ terms is: \[
218 -- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
219 -- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
220 --
221 -- Mutual information
222 -- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
223 --
224 -- Number of cooccurrences of @i@ and @j@ in the same context of text
225 -- \[C{ij}\]
226 --
227 -- The expected value of the cooccurrences @i@ and @j@ (given a map list of size @n@)
228 -- \[E_{ij}^{m} = \frac {S_{i} S_{j}} {N_{m}}\]
229 --
230 -- Total cooccurrences of term @i@ given a map list of size @m@
231 -- \[S_{i} = \sum_{j, j \neq i}^{m} S_{ij}\]
232 --
233 -- Total cooccurrences of terms given a map list of size @m@
234 -- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
235 --
236 distributional :: Matrix Int -> Matrix Double
237 distributional m = run {- -- $ matMiniMax
238 -- $ ri
239 -- $ myMin
240 -}
241 $ filter' 0
242 $ s_mi
243 $ map fromIntegral
244 {- from Int to Double -}
245 $ use m
246 {- push matrix in Accelerate type -}
247 where
248 -- filter m = zipWith (\a b -> max a b) m (transpose m)
249
250 {-
251 ri :: Acc (Matrix Double) -> Acc (Matrix Double)
252 ri mat = mat1 -- zipWith (/) mat1 mat2
253 where
254 mat1 = matSumCol n $ zipWith min' (myMin mat) (myMin $ transpose mat)
255 mat2 = total mat
256 myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
257 myMin = replicate (constant (Z :. n :. All)) . minimum
258
259 -}
260
261 -- TODO fix NaN
262 -- Quali TEST: OK
263 s_mi :: Acc (Matrix Double) -> Acc (Matrix Double)
264 s_mi m' = zipWith (\x y -> log (x / y)) (diagNull n m')
265 $ zipWith (/) (crossProduct n m') (total m')
266 -- crossProduct n m'
267
268
269 total :: Acc (Matrix Double) -> Acc (Matrix Double)
270 total = replicate (constant (Z :. n :. n)) . sum . sum
271
272 n :: Dim
273 n = dim m
274
275 -- run $ (identityMatrix (DAA.constant (10::Int)) :: DAA.Acc (DAA.Matrix Int)) Matrix (Z :. 10 :. 10)
276 identityMatrix :: Num a => Exp Int -> Acc (Matrix a)
277 identityMatrix n =
278 let zeros = fill (index2 n n) 0
279 ones = fill (index1 n) 1
280 in
281 permute const zeros (\(unindex1 -> i) -> index2 i i) ones
282
283
284 eyeMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
285 eyeMatrix n' _m =
286 let ones = fill (index2 n n) 1
287 zeros = fill (index1 n) 0
288 n = constant n'
289 in
290 permute const ones (\(unindex1 -> i) -> index2 i i) zeros
291
292
293 selfMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
294 selfMatrix n' _m =
295 let zeros = fill (index2 n n) 0
296 ones = fill (index2 n n) 1
297 n = constant n'
298 in
299 permute const ones ( lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
300 -> -- ifThenElse (i /= j)
301 -- (Z :. i :. j)
302 (Z :. i :. i)
303 )) zeros
304
305 selfMatrix' :: (Elt a, P.Num (Exp a)) => Array DIM2 a -> Matrix a
306 selfMatrix' m' = run $ selfMatrix n m
307 where
308 n = dim m'
309 m = use m'
310
311 -------------------------------------------------
312 diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
313 diagNull n m = zipWith (*) m eye
314 where
315 eye = eyeMatrix n m
316
317
318 -------------------------------------------------
319 crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
320 crossProduct n m = trace (P.show (run m',run m'')) $ zipWith (*) m' m''
321 where
322 m' = cross n m
323 m'' = transpose $ cross n m
324
325 crossT :: Matrix Double -> Matrix Double
326 crossT = run . transpose . use
327
328 crossProduct' :: Matrix Double -> Matrix Double
329 crossProduct' m = run $ crossProduct n m'
330 where
331 n = dim m
332 m' = use m
333
334 runWith :: (Arrays c, Elt a1)
335 => (Dim -> Acc (Matrix a1) -> a2 -> Acc c)
336 -> Matrix a1
337 -> a2
338 -> c
339 runWith f m = run . f (dim m) (use m)
340
341 -- | cross
342 cross :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
343 cross n mat = diagNull n (matSumCol n $ diagNull n mat)
344
345 cross' :: Matrix Double -> Matrix Double
346 cross' mat = run $ cross n mat'
347 where
348 mat' = use mat
349 n = dim mat
350
351
352 -----------------------------------------------------------------------
353 -----------------------------------------------------------------------
354 -- * Specificity and Genericity
355
356 {- | Metric Specificity and genericity: select terms
357
358 - let N termes and occurrences of i \[N{i}\]
359
360 - Cooccurrences of i and j \[N{ij}\]
361 - Probability to get i given j : \[P(i|j)=N{ij}/N{j}\]
362
363 - Genericity of i \[Gen(i) = \frac{\sum_{j \neq i,j} P(i|j)}{N-1}\]
364 - Specificity of j \[Spec(i) = \frac{\sum_{j \neq i,j} P(j|i)}{N-1}\]
365
366 - \[Inclusion (i) = Gen(i) + Spec(i)\)
367 - \[GenericityScore = Gen(i)- Spec(i)\]
368
369 - References: Science mapping with asymmetrical paradigmatic proximity
370 Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted
371 on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276,
372 arXiv:0803.2315 [cs.OH]
373 -}
374 type InclusionExclusion = Double
375 type SpecificityGenericity = Double
376
377 data SquareMatrix = SymetricMatrix | NonSymetricMatrix
378 type SymetricMatrix = Matrix
379 type NonSymetricMatrix = Matrix
380
381
382 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
383 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
384 where
385 run' fun mat = run $ fun $ map fromIntegral $ use mat
386
387 -- | Inclusion (i) = Gen(i)+Spec(i)
388 inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
389 inclusionExclusion mat = zipWith (+) (pV mat) (pV mat)
390
391 -- | Genericity score = Gen(i)- Spec(i)
392 specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
393 specificityGenericity mat = zipWith (+) (pH mat) (pH mat)
394
395 -- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
396 pV :: Acc (Matrix Double) -> Acc (Vector Double)
397 pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
398
399 -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
400 pH :: Acc (Matrix Double) -> Acc (Vector Double)
401 pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
402
403 cardN :: Exp Double
404 cardN = constant (P.fromIntegral (dim m) :: Double)
405
406
407 -- | P(i|j) = Nij /N(jj) Probability to get i given j
408 --p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
409 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Matrix e) -> Acc (Matrix e)
410 p_ij m = zipWith (/) m (n_jj m)
411 where
412 n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
413 n_jj myMat' = backpermute (shape m)
414 (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
415 -> (Z :. j :. j)
416 )
417 ) myMat'
418
419 -- | P(j|i) = Nij /N(ii) Probability to get i given j
420 -- to test
421 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
422 p_ji = transpose . p_ij
423
424
425 -- | Step to ckeck the result in visual/qualitative tests
426 incExcSpeGen_proba :: Matrix Int -> Matrix Double
427 incExcSpeGen_proba m = run' pro m
428 where
429 run' fun mat = run $ fun $ map fromIntegral $ use mat
430
431 pro mat = p_ji mat
432
433 {-
434 -- | Hypothesis to test maybe later (or not)
435 -- TODO ask accelerate for instances to ease such writtings:
436 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
437 p_ m = zipWith (/) m (n_ m)
438 where
439 n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
440 n_ m = backpermute (shape m)
441 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
442 -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
443 )
444 ) m
445 -}
446
447 -- * For Tests (to be removed)
448 -- | Test perfermance with this matrix
449 -- TODO : add this in a benchmark folder
450 distriTest :: Int -> Matrix Double
451 distriTest n = distributional (matrix n theMatrix)
452 where
453 theMatrix | (P.==) n 3 = [ 1, 1, 2
454 , 1, 2, 3
455 , 2, 3, 4
456 ]
457 | P.otherwise = [ 1, 1
458 , 1, 2
459 ]
460
461
462 theResult | (P.==) n 2 = let r = 1.6094379124341003 in [ 0, r, r, 0]
463 | P.otherwise = [ 1, 1 ]
464
465
466
467
468 -----------------------------------------------------------------------
469