]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
[GRAPH] Distances work with Accelerate (WIP)
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 This module aims at implementig distances of terms context by context is
11 the same referential of corpus.
12
13
14 Implementation use Accelerate library which enables GPU and CPU computation:
15
16 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
17 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
18 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
19
20 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
21 [Optimising Purely Functional GPU Programs][MCKL13].
22 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
23
24 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
25 [Embedding Foreign Code][CMCK14].
26 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
27
28 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
29 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
30 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
31
32 -}
33
34 {-# LANGUAGE TypeFamilies #-}
35 {-# LANGUAGE TypeOperators #-}
36 {-# LANGUAGE ScopedTypeVariables #-}
37 {-# LANGUAGE ViewPatterns #-}
38
39 module Gargantext.Viz.Graph.Distances.Matrice
40 where
41
42 import Debug.Trace (trace)
43 import Data.Array.Accelerate
44 import Data.Array.Accelerate.Interpreter (run)
45 import qualified Gargantext.Prelude as P
46
47
48 -----------------------------------------------------------------------
49 -- | Define a vector
50 --
51 -- >>> vector 3
52 -- Vector (Z :. 3) [0,1,2]
53 vector :: Int -> (Array (Z :. Int) Int)
54 vector n = fromList (Z :. n) [0..n]
55
56 -- | Define a matrix
57 --
58 -- >>> matrix 3 ([1..] :: [Double])
59 -- Matrix (Z :. 3 :. 3)
60 -- [ 1.0, 2.0, 3.0,
61 -- 4.0, 5.0, 6.0,
62 -- 7.0, 8.0, 9.0]
63 matrix :: Elt c => Int -> [c] -> Matrix c
64 matrix n l = fromList (Z :. n :. n) l
65
66 -- | Two ways to get the rank (as documentation)
67 --
68 -- >>> rank (matrix 3 ([1..] :: [Int]))
69 -- 2
70 rank :: (Matrix a) -> Int
71 rank m = arrayRank $ arrayShape m
72
73 -----------------------------------------------------------------------
74 -- | Dimension of a square Matrix
75 -- How to force use with SquareMatrix ?
76 type Dim = Int
77
78 -- | Get Dimension of a square Matrix
79 --
80 -- >>> dim (matrix 3 ([1..] :: [Int]))
81 -- 3
82 dim :: Matrix a -> Dim
83 dim m = n
84 where
85 Z :. _ :. n = arrayShape m
86 -- indexTail (arrayShape m)
87
88 -----------------------------------------------------------------------
89 -- TODO move to Utils
90 runExp :: Elt e => Exp e -> e
91 runExp e = indexArray (run (unit e)) Z
92 -----------------------------------------------------------------------
93
94 -- | Sum of a Matrix by Column
95 --
96 -- >>> run $ matSumCol 3 (use $ matrix 3 [1..])
97 -- Matrix (Z :. 3 :. 3)
98 -- [ 12.0, 15.0, 18.0,
99 -- 12.0, 15.0, 18.0,
100 -- 12.0, 15.0, 18.0]
101 matSumCol :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
102 matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
103
104 matSumCol' :: Matrix Double -> Matrix Double
105 matSumCol' m = run $ matSumCol n m'
106 where
107 n = dim m
108 m' = use m
109
110
111 -- | Proba computes de probability matrix: all cells divided by thee sum of its column
112 -- if you need get the probability on the lines, just transpose it
113 --
114 -- >>> run $ matProba 3 (use $ matrix 3 [1..])
115 -- Matrix (Z :. 3 :. 3)
116 -- [ 8.333333333333333e-2, 0.13333333333333333, 0.16666666666666666,
117 -- 0.3333333333333333, 0.3333333333333333, 0.3333333333333333,
118 -- 0.5833333333333334, 0.5333333333333333, 0.5]
119 matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
120 matProba r mat = zipWith (/) mat (matSumCol r mat)
121
122 -- | Diagonal of the matrix
123 --
124 -- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
125 -- Vector (Z :. 3) [1,5,9]
126 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
127 diag m = backpermute (indexTail (shape m))
128 (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int))))
129 m
130
131 -- | Divide by the Diagonal of the matrix
132 --
133 -- >>> run $ divByDiag 3 (use $ matrix 3 ([1..] :: [Double]))
134 -- Matrix (Z :. 3 :. 3)
135 -- [ 1.0, 0.4, 0.3333333333333333,
136 -- 4.0, 1.0, 0.6666666666666666,
137 -- 7.0, 1.6, 1.0]
138 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
139 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
140
141 -----------------------------------------------------------------------
142 -- | Filters the matrix with the minimum of maximums
143 --
144 -- >>> run $ matMiniMax $ use $ matrix 3 [1..]
145 -- Matrix (Z :. 3 :. 3)
146 -- [ 0.0, 4.0, 7.0,
147 -- 0.0, 5.0, 8.0,
148 -- 0.0, 6.0, 9.0]
149 matMiniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
150 matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
151 where
152 miniMax' = (the $ minimum $ maximum m)
153
154 -- | Filters the matrix with a constant
155 --
156 -- >>> run $ matFilter 5 $ use $ matrix 3 [1..]
157 -- Matrix (Z :. 3 :. 3)
158 -- [ 0.0, 0.0, 7.0,
159 -- 0.0, 0.0, 8.0,
160 -- 0.0, 6.0, 9.0]
161 filter' :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
162 filter' t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
163
164 -----------------------------------------------------------------------
165 -- * Measures of proximity
166 -----------------------------------------------------------------------
167 -- ** Conditional distance
168
169 -- *** Conditional distance (basic)
170
171 -- | Conditional distance (basic version)
172 --
173 -- 2 main measures are actually implemented in order to compute the
174 -- proximity of two terms: conditional and distributional
175 --
176 -- Conditional measure is an absolute measure which reflects
177 -- interactions of 2 terms in the corpus.
178 measureConditional :: Matrix Int -> Matrix Double
179 --measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
180 measureConditional m = run $ matProba (dim m)
181 $ map fromIntegral
182 $ use m
183
184
185 -- *** Conditional distance (advanced)
186
187 -- | Conditional distance (advanced version)
188 --
189 -- The conditional measure P(i|j) of 2 terms @i@ and @j@, also called
190 -- "confidence" , is the maximum probability between @i@ and @j@ to see
191 -- @i@ in the same context of @j@ knowing @j@.
192 --
193 -- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
194 -- in the corpus and _[n_{ij}\] the number of its occurrences we get:
195 --
196 -- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
197 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
198 conditional' m = ( run $ ie $ map fromIntegral $ use m
199 , run $ sg $ map fromIntegral $ use m
200 )
201 where
202 ie :: Acc (Matrix Double) -> Acc (Matrix Double)
203 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
204 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
205 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
206
207 n :: Exp Double
208 n = P.fromIntegral r
209
210 r :: Dim
211 r = dim m
212
213 xs :: Acc (Matrix Double) -> Acc (Matrix Double)
214 xs mat = zipWith (-) (matSumCol r $ matProba r mat) (matProba r mat)
215 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
216 ys mat = zipWith (-) (matSumCol r $ transpose $ matProba r mat) (matProba r mat)
217
218 -----------------------------------------------------------------------
219 -- ** Distributional Distance
220
221 -- | Distributional Distance Measure
222 --
223 -- Distributional measure is a relative measure which depends on the
224 -- selected list, it represents structural equivalence of mutual information.
225 --
226 -- The distributional measure P(c) of @i@ and @j@ terms is: \[
227 -- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
228 -- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
229 --
230 -- Mutual information
231 -- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
232 --
233 -- Number of cooccurrences of @i@ and @j@ in the same context of text
234 -- \[C{ij}\]
235 --
236 -- The expected value of the cooccurrences @i@ and @j@ (given a map list of size @n@)
237 -- \[E_{ij}^{m} = \frac {S_{i} S_{j}} {N_{m}}\]
238 --
239 -- Total cooccurrences of term @i@ given a map list of size @m@
240 -- \[S_{i} = \sum_{j, j \neq i}^{m} S_{ij}\]
241 --
242 -- Total cooccurrences of terms given a map list of size @m@
243 -- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
244 --
245 distributional :: Matrix Int -> Matrix Double
246 distributional m = run -- $ matMiniMax
247 -- $ ri
248 -- $ myMin
249 $ filter' 0
250 $ s_mi
251 -- $ diag2null
252 $ map fromIntegral -- ^ from Int to Double
253 $ use m -- ^ push matrix in Accelerate type
254 where
255 -- filter m = zipWith (\a b -> max a b) m (transpose m)
256
257 ri :: Acc (Matrix Double) -> Acc (Matrix Double)
258 ri mat = mat1 -- zipWith (/) mat1 mat2
259 where
260 mat1 = matSumCol n $ zipWith min' (myMin mat) (myMin $ transpose mat)
261 mat2 = total mat
262
263 s_mi :: Acc (Matrix Double) -> Acc (Matrix Double)
264 s_mi m' = zipWith (\a b -> log (a/b)) m'
265 $ zipWith (/) (crossProduct n m') (total m')
266
267 total :: Acc (Matrix Double) -> Acc (Matrix Double)
268 total = replicate (constant (Z :. n :. n)) . sum . sum
269
270 min' x y
271 | runExp (x > y && x /= 0) = x
272 | P.otherwise = y
273
274 myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
275 myMin = replicate (constant (Z :. n :. All)) . minimum
276
277 n :: Dim
278 n = dim m
279
280 -- run $ (identityMatrix (DAA.constant (10::Int)) :: DAA.Acc (DAA.Matrix Int)) Matrix (Z :. 10 :. 10)
281 identityMatrix :: Num a => Exp Int -> Acc (Matrix a)
282 identityMatrix n =
283 let zeros = fill (index2 n n) 0
284 ones = fill (index1 n) 1
285 in
286 permute const zeros (\(unindex1 -> i) -> index2 i i) ones
287
288
289 eyeMatrix :: Num a => (Matrix a) -> Acc (Matrix a)
290 eyeMatrix m =
291 let zeros = fill (index2 n n) 1
292 ones = fill (index1 n) 0
293 n = constant $ dim m
294 in
295 permute const zeros (\(unindex1 -> i) -> index2 i i) ones
296
297 diag2null :: Num a => (Matrix a) -> Acc (Matrix a)
298 diag2null m' = zipWith (*) m eye
299 where
300 m = use m'
301 eye = eyeMatrix m'
302
303
304
305 crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
306 crossProduct n m = trace (P.show (run m',run m'')) $ zipWith (*) m' m''
307 where
308 m' = cross n m
309 m'' = cross n (transpose m)
310
311 crossT :: Matrix Double -> Matrix Double
312 crossT = run . transpose . use
313
314 crossProduct' :: Matrix Double -> Matrix Double
315 crossProduct' m = run $ crossProduct n m'
316 where
317 n = dim m
318 m' = use m
319
320 runWith :: (Arrays c, Elt a1)
321 => (Dim -> Acc (Matrix a1) -> a2 -> Acc c)
322 -> Matrix a1
323 -> a2
324 -> c
325 runWith f m = run . f (dim m) (use m)
326
327 -- | cross
328 cross :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
329 cross n mat = zipWith (-) (matSumCol n mat) (mat)
330
331 cross' :: Matrix Double -> Matrix Double
332 cross' mat = run $ cross n mat'
333 where
334 mat' = use mat
335 n = dim mat
336
337
338 -----------------------------------------------------------------------
339 -----------------------------------------------------------------------
340 -- * Specificity and Genericity
341
342 {- | Metric Specificity and genericity: select terms
343
344 - let N termes and occurrences of i \[N{i}\]
345
346 - Cooccurrences of i and j \[N{ij}\]
347 - Probability to get i given j : \[P(i|j)=N{ij}/N{j}\]
348
349 - Genericity of i \[Gen(i) = \frac{\sum_{j \neq i,j} P(i|j)}{N-1}\]
350 - Specificity of j \[Spec(i) = \frac{\sum_{j \neq i,j} P(j|i)}{N-1}\]
351
352 - \[Inclusion (i) = Gen(i) + Spec(i)\)
353 - \[GenericityScore = Gen(i)- Spec(i)\]
354
355 - References: Science mapping with asymmetrical paradigmatic proximity
356 Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted
357 on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276,
358 arXiv:0803.2315 [cs.OH]
359 -}
360 type InclusionExclusion = Double
361 type SpecificityGenericity = Double
362
363 data SquareMatrix = SymetricMatrix | NonSymetricMatrix
364 type SymetricMatrix = Matrix
365 type NonSymetricMatrix = Matrix
366
367
368 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
369 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
370 where
371 run' fun mat = run $ fun $ map fromIntegral $ use mat
372
373 -- | Inclusion (i) = Gen(i)+Spec(i)
374 inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
375 inclusionExclusion mat = zipWith (+) (pV mat) (pV mat)
376
377 -- | Genericity score = Gen(i)- Spec(i)
378 specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
379 specificityGenericity mat = zipWith (+) (pH mat) (pH mat)
380
381 -- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
382 pV :: Acc (Matrix Double) -> Acc (Vector Double)
383 pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
384
385 -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
386 pH :: Acc (Matrix Double) -> Acc (Vector Double)
387 pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
388
389 cardN :: Exp Double
390 cardN = constant (P.fromIntegral (dim m) :: Double)
391
392
393 -- | P(i|j) = Nij /N(jj) Probability to get i given j
394 --p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
395 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Matrix e) -> Acc (Matrix e)
396 p_ij m = zipWith (/) m (n_jj m)
397 where
398 n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
399 n_jj myMat' = backpermute (shape m)
400 (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
401 -> (Z :. j :. j)
402 )
403 ) myMat'
404
405 -- | P(j|i) = Nij /N(ii) Probability to get i given j
406 -- to test
407 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
408 p_ji = transpose . p_ij
409
410
411 -- | Step to ckeck the result in visual/qualitative tests
412 incExcSpeGen_proba :: Matrix Int -> Matrix Double
413 incExcSpeGen_proba m = run' pro m
414 where
415 run' fun mat = run $ fun $ map fromIntegral $ use mat
416
417 pro mat = p_ji mat
418
419 {-
420 -- | Hypothesis to test maybe later (or not)
421 -- TODO ask accelerate for instances to ease such writtings:
422 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
423 p_ m = zipWith (/) m (n_ m)
424 where
425 n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
426 n_ m = backpermute (shape m)
427 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
428 -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
429 )
430 ) m
431 -}
432
433 -- * For Tests (to be removed)
434 -- | Test perfermance with this matrix
435 -- TODO : add this in a benchmark folder
436 distriTest :: Matrix Double
437 distriTest = distributional $ matrix 100 [1..]
438 -----------------------------------------------------------------------
439