]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
[DIST] nullOf fun
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 This module aims at implementig distances of terms context by context is
11 the same referential of corpus.
12
13 Implementation use Accelerate library which enables GPU and CPU computation:
14
15 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
16 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
17 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
18
19 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
20 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
21 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
22
23 -}
24
25 {-# LANGUAGE TypeFamilies #-}
26 {-# LANGUAGE TypeOperators #-}
27 {-# LANGUAGE ScopedTypeVariables #-}
28 {-# LANGUAGE ViewPatterns #-}
29
30 module Gargantext.Viz.Graph.Distances.Matrice
31 where
32
33 import Debug.Trace (trace)
34 import Data.Array.Accelerate
35 import Data.Array.Accelerate.Interpreter (run)
36 import qualified Gargantext.Prelude as P
37
38
39 -----------------------------------------------------------------------
40 -- | Define a vector
41 --
42 -- >>> vector 3
43 -- Vector (Z :. 3) [0,1,2]
44 vector :: Elt c => Int -> [c] -> (Array (Z :. Int) c)
45 vector n l = fromList (Z :. n) l
46
47 -- | Define a matrix
48 --
49 -- >>> matrix 3 ([1..] :: [Double])
50 -- Matrix (Z :. 3 :. 3)
51 -- [ 1.0, 2.0, 3.0,
52 -- 4.0, 5.0, 6.0,
53 -- 7.0, 8.0, 9.0]
54 matrix :: Elt c => Int -> [c] -> Matrix c
55 matrix n l = fromList (Z :. n :. n) l
56
57 -- | Two ways to get the rank (as documentation)
58 --
59 -- >>> rank (matrix 3 ([1..] :: [Int]))
60 -- 2
61 rank :: (Matrix a) -> Int
62 rank m = arrayRank $ arrayShape m
63
64 -----------------------------------------------------------------------
65 -- | Dimension of a square Matrix
66 -- How to force use with SquareMatrix ?
67 type Dim = Int
68
69 -- | Get Dimension of a square Matrix
70 --
71 -- >>> dim (matrix 3 ([1..] :: [Int]))
72 -- 3
73 dim :: Matrix a -> Dim
74 dim m = n
75 where
76 Z :. _ :. n = arrayShape m
77 -- indexTail (arrayShape m)
78
79 -----------------------------------------------------------------------
80 -- TODO move to Utils
81 runExp :: Elt e => Exp e -> e
82 runExp e = indexArray (run (unit e)) Z
83 -----------------------------------------------------------------------
84
85 -- | Sum of a Matrix by Column
86 --
87 -- >>> run $ matSumCol 3 (use $ matrix 3 [1..])
88 -- Matrix (Z :. 3 :. 3)
89 -- [ 12.0, 15.0, 18.0,
90 -- 12.0, 15.0, 18.0,
91 -- 12.0, 15.0, 18.0]
92 matSumCol :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
93 matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
94
95 matSumCol' :: Matrix Double -> Matrix Double
96 matSumCol' m = run $ matSumCol n m'
97 where
98 n = dim m
99 m' = use m
100
101
102 -- | Proba computes de probability matrix: all cells divided by thee sum of its column
103 -- if you need get the probability on the lines, just transpose it
104 --
105 -- >>> run $ matProba 3 (use $ matrix 3 [1..])
106 -- Matrix (Z :. 3 :. 3)
107 -- [ 8.333333333333333e-2, 0.13333333333333333, 0.16666666666666666,
108 -- 0.3333333333333333, 0.3333333333333333, 0.3333333333333333,
109 -- 0.5833333333333334, 0.5333333333333333, 0.5]
110 matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
111 matProba r mat = zipWith (/) mat (matSumCol r mat)
112
113 -- | Diagonal of the matrix
114 --
115 -- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
116 -- Vector (Z :. 3) [1,5,9]
117 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
118 diag m = backpermute (indexTail (shape m))
119 (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int))))
120 m
121
122 -- | Divide by the Diagonal of the matrix
123 --
124 -- >>> run $ divByDiag 3 (use $ matrix 3 ([1..] :: [Double]))
125 -- Matrix (Z :. 3 :. 3)
126 -- [ 1.0, 0.4, 0.3333333333333333,
127 -- 4.0, 1.0, 0.6666666666666666,
128 -- 7.0, 1.6, 1.0]
129 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
130 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
131
132 -----------------------------------------------------------------------
133 -- | Filters the matrix with the minimum of maximums
134 --
135 -- >>> run $ matMiniMax $ use $ matrix 3 [1..]
136 -- Matrix (Z :. 3 :. 3)
137 -- [ 0.0, 4.0, 7.0,
138 -- 0.0, 5.0, 8.0,
139 -- 0.0, 6.0, 9.0]
140 matMiniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
141 matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
142 where
143 miniMax' = (the $ minimum $ maximum m)
144
145
146
147 -- | Filters the matrix with a constant
148 --
149 -- >>> run $ matFilter 5 $ use $ matrix 3 [1..]
150 -- Matrix (Z :. 3 :. 3)
151 -- [ 0.0, 0.0, 7.0,
152 -- 0.0, 0.0, 8.0,
153 -- 0.0, 6.0, 9.0]
154 filter' :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
155 filter' t m = filterWith t 0 m
156
157 filterWith :: Double -> Double -> Acc (Matrix Double) -> Acc (Matrix Double)
158 filterWith t v m = map (\x -> ifThenElse (x > (constant t)) x (constant v)) (transpose m)
159
160
161
162 -----------------------------------------------------------------------
163 -- * Measures of proximity
164 -----------------------------------------------------------------------
165 -- ** Conditional distance
166
167 -- *** Conditional distance (basic)
168
169 -- | Conditional distance (basic version)
170 --
171 -- 2 main measures are actually implemented in order to compute the
172 -- proximity of two terms: conditional and distributional
173 --
174 -- Conditional measure is an absolute measure which reflects
175 -- interactions of 2 terms in the corpus.
176 measureConditional :: Matrix Int -> Matrix Double
177 --measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
178 measureConditional m = run $ matProba (dim m)
179 $ map fromIntegral
180 $ use m
181
182
183 -- *** Conditional distance (advanced)
184
185 -- | Conditional distance (advanced version)
186 --
187 -- The conditional measure P(i|j) of 2 terms @i@ and @j@, also called
188 -- "confidence" , is the maximum probability between @i@ and @j@ to see
189 -- @i@ in the same context of @j@ knowing @j@.
190 --
191 -- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
192 -- in the corpus and _[n_{ij}\] the number of its occurrences we get:
193 --
194 -- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
195 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
196 conditional' m = ( run $ ie $ map fromIntegral $ use m
197 , run $ sg $ map fromIntegral $ use m
198 )
199 where
200 ie :: Acc (Matrix Double) -> Acc (Matrix Double)
201 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
202 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
203 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
204
205 n :: Exp Double
206 n = P.fromIntegral r
207
208 r :: Dim
209 r = dim m
210
211 xs :: Acc (Matrix Double) -> Acc (Matrix Double)
212 xs mat = zipWith (-) (matSumCol r $ matProba r mat) (matProba r mat)
213 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
214 ys mat = zipWith (-) (matSumCol r $ transpose $ matProba r mat) (matProba r mat)
215
216 -----------------------------------------------------------------------
217 -- ** Distributional Distance
218
219 -- | Distributional Distance Measure
220 --
221 -- Distributional measure is a relative measure which depends on the
222 -- selected list, it represents structural equivalence of mutual information.
223 --
224 -- The distributional measure P(c) of @i@ and @j@ terms is: \[
225 -- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
226 -- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
227 --
228 -- Mutual information
229 -- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
230 --
231 -- Number of cooccurrences of @i@ and @j@ in the same context of text
232 -- \[C{ij}\]
233 --
234 -- The expected value of the cooccurrences @i@ and @j@ (given a map list of size @n@)
235 -- \[E_{ij}^{m} = \frac {S_{i} S_{j}} {N_{m}}\]
236 --
237 -- Total cooccurrences of term @i@ given a map list of size @m@
238 -- \[S_{i} = \sum_{j, j \neq i}^{m} S_{ij}\]
239 --
240 -- Total cooccurrences of terms given a map list of size @m@
241 -- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
242 --
243 distributional :: Matrix Int -> Matrix Double
244 distributional m = run -- $ matMiniMax
245 -- $ diagNull n
246 $ ri
247 $ filterWith 0 100
248 $ filter' 0
249 $ s_mi
250 $ map fromIntegral
251 {- from Int to Double -}
252 $ use m
253 {- push matrix in Accelerate type -}
254 where
255
256 ri :: Acc (Matrix Double) -> Acc (Matrix Double)
257 ri mat = mat1 -- zipWith (/) mat1 mat2
258 where
259 mat1 = matSumCol n $ zipWith min (myMin mat) (myMin $ filterWith 0 100 $ diagNull n $ transpose mat)
260 mat2 = total mat
261
262 myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
263 myMin = replicate (constant (Z :. n :. All)) . minimum
264
265
266 -- TODO fix NaN
267 -- Quali TEST: OK
268 s_mi :: Acc (Matrix Double) -> Acc (Matrix Double)
269 s_mi m' = zipWith (\x y -> log (x / y)) (diagNull n m')
270 $ zipWith (/) (crossProduct n m') (total m')
271 -- crossProduct n m'
272
273
274 total :: Acc (Matrix Double) -> Acc (Matrix Double)
275 total = replicate (constant (Z :. n :. n)) . sum . sum
276
277 n :: Dim
278 n = dim m
279
280 -- run $ (identityMatrix (DAA.constant (10::Int)) :: DAA.Acc (DAA.Matrix Int)) Matrix (Z :. 10 :. 10)
281 identityMatrix :: Num a => Exp Int -> Acc (Matrix a)
282 identityMatrix n =
283 let zeros = fill (index2 n n) 0
284 ones = fill (index1 n) 1
285 in
286 permute const zeros (\(unindex1 -> i) -> index2 i i) ones
287
288
289 eyeMatrix :: Num a => Dim -> Acc (Matrix a)
290 eyeMatrix n' =
291 let ones = fill (index2 n n) 1
292 zeros = fill (index1 n) 0
293 n = constant n'
294 in
295 permute const ones (\(unindex1 -> i) -> index2 i i) zeros
296
297 -- | TODO use Lenses
298 data Direction = MatCol (Exp Int) | MatRow (Exp Int) | Diag
299
300 nullOf :: Num a => Dim -> Direction -> Acc (Matrix a)
301 nullOf n' dir =
302 let ones = fill (index2 n n) 1
303 zeros = fill (index2 n n) 0
304 n = constant n'
305 in
306 permute const ones -- (\(unindex2 -> i) -> let Exp (x,y) = i in index2 x y)
307
308 ( lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
309 -> case dir of
310 MatCol m -> (Z :. i :. m)
311 MatRow m -> (Z :. m :. i)
312 Diag -> (Z :. i :. i)
313
314 ))
315 zeros
316
317 nullOfWithDiag :: Num a => Dim -> Direction -> Acc (Matrix a)
318 nullOfWithDiag n dir = zipWith (*) (nullOf n dir) (nullOf n Diag)
319
320
321
322 {- | WIP fun with indexes
323 selfMatrix :: Num a => Dim -> Acc (Matrix a)
324 selfMatrix n' =
325 let zeros = fill (index2 n n) 0
326 ones = fill (index2 n n) 1
327 n = constant n'
328 in
329 permute const ones ( lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
330 -> -- ifThenElse (i /= j)
331 -- (Z :. i :. j)
332 (Z :. i :. i)
333 )) zeros
334
335 selfMatrix' :: (Elt a, P.Num (Exp a)) => Array DIM2 a -> Matrix a
336 selfMatrix' m' = run $ selfMatrix n
337 where
338 n = dim m'
339 m = use m'
340 -}
341 -------------------------------------------------
342 diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
343 diagNull n m = zipWith (*) m eye
344 where
345 eye = eyeMatrix n
346
347 -------------------------------------------------
348 crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
349 crossProduct n m = {-trace (P.show (run m',run m'')) $-} zipWith (*) m' m''
350 where
351 m' = cross n m
352 m'' = transpose $ cross n m
353
354
355 crossT :: Matrix Double -> Matrix Double
356 crossT = run . transpose . use
357
358 crossProduct' :: Matrix Double -> Matrix Double
359 crossProduct' m = run $ crossProduct n m'
360 where
361 n = dim m
362 m' = use m
363
364 runWith :: (Arrays c, Elt a1)
365 => (Dim -> Acc (Matrix a1) -> a2 -> Acc c)
366 -> Matrix a1
367 -> a2
368 -> c
369 runWith f m = run . f (dim m) (use m)
370
371 -- | cross
372 cross :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
373 cross n mat = diagNull n (matSumCol n $ diagNull n mat)
374
375 cross' :: Matrix Double -> Matrix Double
376 cross' mat = run $ cross n mat'
377 where
378 mat' = use mat
379 n = dim mat
380
381
382 -----------------------------------------------------------------------
383 -----------------------------------------------------------------------
384 -- * Specificity and Genericity
385
386 {- | Metric Specificity and genericity: select terms
387
388 - let N termes and occurrences of i \[N{i}\]
389
390 - Cooccurrences of i and j \[N{ij}\]
391 - Probability to get i given j : \[P(i|j)=N{ij}/N{j}\]
392
393 - Genericity of i \[Gen(i) = \frac{\sum_{j \neq i,j} P(i|j)}{N-1}\]
394 - Specificity of j \[Spec(i) = \frac{\sum_{j \neq i,j} P(j|i)}{N-1}\]
395
396 - \[Inclusion (i) = Gen(i) + Spec(i)\)
397 - \[GenericityScore = Gen(i)- Spec(i)\]
398
399 - References: Science mapping with asymmetrical paradigmatic proximity
400 Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted
401 on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276,
402 arXiv:0803.2315 [cs.OH]
403 -}
404 type InclusionExclusion = Double
405 type SpecificityGenericity = Double
406
407 data SquareMatrix = SymetricMatrix | NonSymetricMatrix
408 type SymetricMatrix = Matrix
409 type NonSymetricMatrix = Matrix
410
411
412 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
413 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
414 where
415 run' fun mat = run $ fun $ map fromIntegral $ use mat
416
417 -- | Inclusion (i) = Gen(i)+Spec(i)
418 inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
419 inclusionExclusion mat = zipWith (+) (pV mat) (pV mat)
420
421 -- | Genericity score = Gen(i)- Spec(i)
422 specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
423 specificityGenericity mat = zipWith (+) (pH mat) (pH mat)
424
425 -- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
426 pV :: Acc (Matrix Double) -> Acc (Vector Double)
427 pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
428
429 -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
430 pH :: Acc (Matrix Double) -> Acc (Vector Double)
431 pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
432
433 cardN :: Exp Double
434 cardN = constant (P.fromIntegral (dim m) :: Double)
435
436
437 -- | P(i|j) = Nij /N(jj) Probability to get i given j
438 --p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
439 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Matrix e) -> Acc (Matrix e)
440 p_ij m = zipWith (/) m (n_jj m)
441 where
442 n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
443 n_jj myMat' = backpermute (shape m)
444 (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
445 -> (Z :. j :. j)
446 )
447 ) myMat'
448
449 -- | P(j|i) = Nij /N(ii) Probability to get i given j
450 -- to test
451 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
452 p_ji = transpose . p_ij
453
454
455 -- | Step to ckeck the result in visual/qualitative tests
456 incExcSpeGen_proba :: Matrix Int -> Matrix Double
457 incExcSpeGen_proba m = run' pro m
458 where
459 run' fun mat = run $ fun $ map fromIntegral $ use mat
460
461 pro mat = p_ji mat
462
463 {-
464 -- | Hypothesis to test maybe later (or not)
465 -- TODO ask accelerate for instances to ease such writtings:
466 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
467 p_ m = zipWith (/) m (n_ m)
468 where
469 n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
470 n_ m = backpermute (shape m)
471 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
472 -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
473 )
474 ) m
475 -}
476
477 -- * For Tests (to be removed)
478 -- | Test perfermance with this matrix
479 -- TODO : add this in a benchmark folder
480 distriTest :: Int -> Matrix Double
481 distriTest n = distributional (matrix n theMatrix)
482 where
483 theMatrix | (P.==) n 2 = [ 1, 1
484 , 1, 2
485 ]
486
487 | (P.==) n 3 = [ 1, 1, 2
488 , 1, 2, 3
489 , 2, 3, 4
490 ]
491 | (P.==) n 4 = [ 1, 1, 2, 3
492 , 1, 2, 3, 4
493 , 2, 3, 4, 5
494 , 3, 4, 5, 6
495 ]
496 | P.otherwise = P.undefined
497
498 theResult | (P.==) n 2 = let r = 1.6094379124341003 in [ 0, r, r, 0]
499 | P.otherwise = [ 1, 1 ]
500
501
502
503
504 -----------------------------------------------------------------------
505