]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
need fix
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
16
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
20
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
24
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
28
29 -}
30
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
35
36 module Gargantext.Viz.Graph.Distances.Matrice
37 where
38
39 import Data.Array.Accelerate
40 import Data.Array.Accelerate.Interpreter (run)
41 import Data.Array.Accelerate.Smart
42 import Data.Array.Accelerate.Type
43 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
44
45 import Data.Maybe (Maybe(Just))
46 import qualified Gargantext.Prelude as P
47 import qualified Data.Array.Accelerate.Array.Representation as Repr
48
49 import Gargantext.Text.Metrics.Count
50
51
52 -----------------------------------------------------------------------
53 -- Test perf.
54 distriTest = distributional $ myMat 100
55 -----------------------------------------------------------------------
56
57 vector :: Int -> (Array (Z :. Int) Int)
58 vector n = fromList (Z :. n) [0..n]
59
60 matrix :: Elt c => Int -> [c] -> Matrix c
61 matrix n l = fromList (Z :. n :. n) l
62
63 myMat :: Int -> Matrix Int
64 myMat n = matrix n [1..]
65
66 -- | Two ways to get the rank (as documentation)
67 rank :: (Matrix a) -> Int
68 rank m = arrayRank $ arrayShape m
69
70 -----------------------------------------------------------------------
71 -- | Dimension of a square Matrix
72 -- How to force use with SquareMatrix ?
73 type Dim = Int
74
75 dim :: (Matrix a) -> Dim
76 dim m = n
77 where
78 Z :. _ :. n = arrayShape m
79 -- == indexTail (arrayShape m)
80
81 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
82 proba r mat = zipWith (/) mat (mkSum r mat)
83
84 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
85 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
86
87 -- divByDiag
88 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
89 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
90 where
91 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
92 diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
93
94 -- | Conditional Distance
95
96 {-
97 Metric Specificity and genericity: select terms
98
99 N termes
100
101 Ni : occ de i
102
103 Nij : cooc i et j
104
105 P(i|j)=Nij/Nj Probability to get i given j
106
107 Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
108
109 Spec(i) : 1/(N-1)*Sum( j!=i, P(j|i)) : Specificity of j
110
111 Inclusion (i) = Gen(i)+Spec(i)
112
113 Genericity score = Gen(i)- Spec(i)
114
115
116 ----
117
118 Compute genericity/specificity:
119 P(j|i) = N(ij) / N(ii)
120 P(i|j) = N(ij) / N(jj)
121
122 Gen(i) = sum P(i|j) | j /= i) / (N-1)
123 Spec(i) = sum P(j|i) | i /= j) / (N-1)
124
125 Genericity(i) = (Gen(i) - Spe(i)) / 2
126 Inclusion(i) = (Spec(i) + Gen(i)) / 2
127
128 -}
129
130 -- M - M-1 = 0
131 data SquareMatrix = SymetricMatrix | NonSymetricMatrix
132
133 type SymetricMatrix = Matrix
134 type NonSymetricMatrix = Matrix
135
136 -- | Compute genericity/specificity:
137 ---- | P(i|j) = N(ij) / N(jj)
138 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
139 p_ij m = zipWith (/) m (n_jj m)
140 where
141 n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
142 n_jj m = backpermute (shape m)
143 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
144 -> ifThenElse (i < j) (Z :. j :. j) (Z :. i :. i)
145 )
146 ) m
147
148 -- P(j|i) = N(ij) / N(ii)
149 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
150 p_ji m = zipWith (/) m (n_ii m)
151 where
152 n_ii :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
153 n_ii m = backpermute (shape m)
154 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
155 -> (Z :. i :. i)
156 )
157 ) m
158
159
160 type Matrix' a = Acc (Matrix a)
161 type InclusionExclusion = Double
162 type SpecificityGenericity = Double
163
164
165 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
166 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
167 where
168 miniMax' = (the $ minimum $ maximum m)
169
170 -- | Conditional distance (basic version)
171 conditional :: Matrix Int -> Matrix Double
172 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
173
174
175 -- | Conditional distance (advanced version)
176 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
177 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
178 where
179
180 ie :: Matrix' Double -> Matrix' Double
181 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
182 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
183 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
184
185 n :: Exp Double
186 n = P.fromIntegral r
187
188 r :: Dim
189 r = dim m
190
191 xs :: Matrix' Double -> Matrix' Double
192 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
193 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
194 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
195
196 -----------------------------------------------------------------------
197
198 -- | Distributional Distance
199 distributional :: Matrix Int -> Matrix Double
200 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
201 where
202 n = dim m
203
204 filter m = zipWith (\a b -> max a b) m (transpose m)
205
206 ri mat = zipWith (/) mat1 mat2
207 where
208 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
209 mat2 = mkSum n mat
210
211 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
212 $ zipWith (/) (crossProduct m') (total m')
213
214 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
215
216 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
217 cross mat = zipWith (-) (mkSum n mat) (mat)
218
219
220 int2double :: Matrix Int -> Matrix Double
221 int2double m = run (map fromIntegral $ use m)
222
223 incExcSpeGen' :: Matrix Int -> (Vector Double, Vector Double)
224 incExcSpeGen' m = (run' ie m, run' sg m)
225 where
226 run' fun mat = run $ fun $ map fromIntegral $ use mat
227
228 ie :: Acc (Matrix Double) -> Acc (Vector Double)
229 ie mat = zipWith (-) (pV mat) (pH mat)
230 --
231 sg :: Acc (Matrix Double) -> Acc (Vector Double)
232 sg mat = zipWith (+) (pV mat) (pH mat)
233
234 n :: Exp Double
235 n = constant (P.fromIntegral (dim m) :: Double)
236
237 pV :: Acc (Matrix Double) -> Acc (Vector Double)
238 pV mat = map (\x -> (x-1)/(n-1)) $ sum $ p_ij mat
239
240 pH :: Acc (Matrix Double) -> Acc (Vector Double)
241 pH mat = map (\x -> (x-1)/(n-1)) $ sum $ p_ji mat
242
243
244 incExcSpeGen_proba :: Matrix Int -> Matrix Double
245 incExcSpeGen_proba m = run' pro m
246 where
247 run' fun mat = run $ fun $ map fromIntegral $ use mat
248
249 pro mat = p_ij mat