]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
[Scores] Inclusion/Exclusion and Specificity/Genericity ok with my tests.
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
16
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
20
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
24
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
28
29 -}
30
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
35
36 module Gargantext.Viz.Graph.Distances.Matrice
37 where
38
39 import Data.Array.Accelerate
40 import Data.Array.Accelerate.Interpreter (run)
41 import Data.Array.Accelerate.Smart
42 import Data.Array.Accelerate.Type
43 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
44
45 import Data.Maybe (Maybe(Just))
46 import qualified Gargantext.Prelude as P
47 import qualified Data.Array.Accelerate.Array.Representation as Repr
48
49 import Gargantext.Text.Metrics.Count
50
51
52 -----------------------------------------------------------------------
53 -- Test perf.
54 distriTest = distributional $ myMat 100
55 -----------------------------------------------------------------------
56
57 vector :: Int -> (Array (Z :. Int) Int)
58 vector n = fromList (Z :. n) [0..n]
59
60 matrix :: Elt c => Int -> [c] -> Matrix c
61 matrix n l = fromList (Z :. n :. n) l
62
63 myMat :: Int -> Matrix Int
64 myMat n = matrix n [1..]
65
66 -- | Two ways to get the rank (as documentation)
67 rank :: (Matrix a) -> Int
68 rank m = arrayRank $ arrayShape m
69
70 -----------------------------------------------------------------------
71 -- | Dimension of a square Matrix
72 -- How to force use with SquareMatrix ?
73 type Dim = Int
74
75 dim :: (Matrix a) -> Dim
76 dim m = n
77 where
78 Z :. _ :. n = arrayShape m
79 -- == indexTail (arrayShape m)
80
81 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
82 proba r mat = zipWith (/) mat (mkSum r mat)
83
84 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
85 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
86
87 -- divByDiag
88 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
89 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
90 where
91 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
92 diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
93
94 -- | Conditional Distance
95
96 {-
97 Metric Specificity and genericity: select terms
98
99 N termes
100
101 Ni : occ de i
102
103 Nij : cooc i et j
104
105 P(i|j)=Nij/Nj Probability to get i given j
106
107 Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
108
109 Spec(i) : 1/(N-1)*Sum( j!=i, P(j|i)) : Specificity of j
110
111 Inclusion (i) = Gen(i)+Spec(i)
112
113 Genericity score = Gen(i)- Spec(i)
114
115
116 ----
117
118 Compute genericity/specificity:
119 P(j|i) = N(ij) / N(ii)
120 P(i|j) = N(ij) / N(jj)
121
122 Gen(i) = sum P(i|j) | j /= i) / (N-1)
123 Spec(i) = sum P(j|i) | i /= j) / (N-1)
124
125 Genericity(i) = (Gen(i) - Spe(i)) / 2
126 Inclusion(i) = (Spec(i) + Gen(i)) / 2
127
128 -}
129
130 -- M - M-1 = 0
131 data SquareMatrix = SymetricMatrix | NonSymetricMatrix
132
133 type SymetricMatrix = Matrix
134 type NonSymetricMatrix = Matrix
135
136 -- | Compute genericity/specificity:
137 --p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
138 --p_ m = zipWith (/) m (n_jj m)
139 -- where
140 -- n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
141 -- n_jj m = backpermute (shape m)
142 -- (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
143 -- -> ifThenElse (i < j) (Z :. j :. j) (Z :. i :. i)
144 -- )
145 -- ) m
146
147
148
149
150 ---- | P(i|j) = N(ij) / N(jj)
151 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
152 p_ij m = zipWith (/) m (n_jj m)
153 where
154 n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
155 n_jj m = backpermute (shape m)
156 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
157 -> (Z :. j :. j)
158 )
159 ) m
160
161 -- P(j|i) = N(ij) / N(ii)
162 -- to test
163 p_ji' :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
164 p_ji' = transpose . p_ij
165
166 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
167 p_ji m = zipWith (/) m (n_jj m)
168 where
169 n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
170 n_jj m = backpermute (shape m)
171 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
172 -> (Z :. i :. i)
173 )
174 ) m
175
176
177
178 type Matrix' a = Acc (Matrix a)
179 type InclusionExclusion = Double
180 type SpecificityGenericity = Double
181
182
183 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
184 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
185 where
186 miniMax' = (the $ minimum $ maximum m)
187
188 -- | Conditional distance (basic version)
189 conditional :: Matrix Int -> Matrix Double
190 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
191
192
193 -- | Conditional distance (advanced version)
194 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
195 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
196 where
197
198 ie :: Matrix' Double -> Matrix' Double
199 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
200 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
201 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
202
203 n :: Exp Double
204 n = P.fromIntegral r
205
206 r :: Dim
207 r = dim m
208
209 xs :: Matrix' Double -> Matrix' Double
210 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
211 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
212 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
213
214 -----------------------------------------------------------------------
215
216 -- | Distributional Distance
217 distributional :: Matrix Int -> Matrix Double
218 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
219 where
220 n = dim m
221
222 filter m = zipWith (\a b -> max a b) m (transpose m)
223
224 ri mat = zipWith (/) mat1 mat2
225 where
226 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
227 mat2 = mkSum n mat
228
229 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
230 $ zipWith (/) (crossProduct m') (total m')
231
232 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
233
234 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
235 cross mat = zipWith (-) (mkSum n mat) (mat)
236
237
238 int2double :: Matrix Int -> Matrix Double
239 int2double m = run (map fromIntegral $ use m)
240
241 incExcSpeGen' :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
242 incExcSpeGen' m = (run' ie m, run' sg m)
243 where
244 run' fun mat = run $ fun $ map fromIntegral $ use mat
245
246 ie :: Acc (Matrix Double) -> Acc (Vector Double)
247 ie mat = zipWith (+) (pV mat) (pH mat)
248 --
249 sg :: Acc (Matrix Double) -> Acc (Vector Double)
250 sg mat = zipWith (-) (pV mat) (pH mat)
251
252 n :: Exp Double
253 n = constant (P.fromIntegral (dim m) :: Double)
254
255 pV :: Acc (Matrix Double) -> Acc (Vector Double)
256 pV mat = map (\x -> (x-1)/(n-1)) $ sum $ p_ij mat
257
258 pH :: Acc (Matrix Double) -> Acc (Vector Double)
259 pH mat = map (\x -> (x-1)/(n-1)) $ sum $ transpose $ p_ij mat
260
261
262 incExcSpeGen_proba :: Matrix Int -> Matrix Double
263 incExcSpeGen_proba m = run' pro m
264 where
265 run' fun mat = run $ fun $ map fromIntegral $ use mat
266
267 pro mat = p_ji mat