]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
[FEAT] gen/spe statistics.
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
16
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
20
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
24
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
28
29 -}
30
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
35
36 module Gargantext.Viz.Graph.Distances.Matrice
37 where
38
39 import Data.Array.Accelerate
40 import Data.Array.Accelerate.Interpreter (run)
41 import Data.Array.Accelerate.Smart
42 import Data.Array.Accelerate.Type
43 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
44
45 import Data.Maybe (Maybe(Just))
46 import qualified Gargantext.Prelude as P
47 import qualified Data.Array.Accelerate.Array.Representation as Repr
48
49 import Gargantext.Text.Metrics.Count
50
51
52 -----------------------------------------------------------------------
53 -- Test perf.
54 distriTest = distributional $ myMat 100
55 -----------------------------------------------------------------------
56
57 vector :: Int -> (Array (Z :. Int) Int)
58 vector n = fromList (Z :. n) [0..n]
59
60 matrix :: Elt c => Int -> [c] -> Matrix c
61 matrix n l = fromList (Z :. n :. n) l
62
63 myMat :: Int -> Matrix Int
64 myMat n = matrix n [1..]
65
66 -- | Two ways to get the rank (as documentation)
67 rank :: (Matrix a) -> Int
68 rank m = arrayRank $ arrayShape m
69
70 -----------------------------------------------------------------------
71 -- | Conditional Distance
72
73 -- | Dimension of a square Matrix
74 -- How to force use with SquareMatrix ?
75 type Dim = Int
76
77 dim :: (Matrix a) -> Dim
78 dim m = n
79 where
80 Z :. _ :. n = arrayShape m
81 -- == indexTail (arrayShape m)
82
83 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
84 proba r mat = zipWith (/) mat (mkSum r mat)
85
86 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
87 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
88
89 -- divByDiag
90 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
91 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
92
93 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
94 diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
95
96 {-
97 Metric Specificity and genericity: select terms
98 Compute genericity/specificity:
99 P(j|i) = N(ij) / N(ii)
100 P(i|j) = N(ij) / N(jj)
101
102 Gen(i) = sum P(i|j) | j /= i) / (N-1)
103 Spec(i) = sum P(j|i) | i /= j) / (N-1)
104
105 Genericity(i) = (Gen(i) - Spe(i)) / 2
106 Inclusion(i) = (Spec(i) + Gen(i)) / 2
107
108 -}
109
110 -- M - M-1 = 0
111 data SquareMatrix = SymetricMatrix | NonSymetricMatrix
112
113 type SymetricMatrix = Matrix
114 type NonSymetricMatrix = Matrix
115
116 -- | Compute genericity/specificity:
117 ---- | P(i|j) = N(ij) / N(jj)
118 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
119 p_ij m = zipWith (/) m (n_jj m)
120 where
121 n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
122 n_jj m = backpermute (shape m)
123 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
124 -> (Z :. j :. j)
125 )
126 ) m
127
128 -- P(j|i) = N(ij) / N(ii)
129 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
130 p_ji m = zipWith (/) m (n_ii m)
131 where
132 n_ii :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
133 n_ii m = backpermute (shape m)
134 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
135 -> (Z :. i :. i)
136 )
137 ) m
138
139
140 type Matrix' a = Acc (Matrix a)
141 type InclusionExclusion = Double
142 type SpecificityGenericity = Double
143
144
145 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
146 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
147 where
148 miniMax' = (the $ minimum $ maximum m)
149
150 -- | Conditional distance (basic version)
151 conditional :: Matrix Int -> Matrix Double
152 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
153
154
155 -- | Conditional distance (advanced version)
156 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
157 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
158 where
159
160 ie :: Matrix' Double -> Matrix' Double
161 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
162 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
163 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
164
165 n :: Exp Double
166 n = P.fromIntegral r
167
168 r :: Dim
169 r = dim m
170
171 xs :: Matrix' Double -> Matrix' Double
172 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
173 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
174 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
175
176 -----------------------------------------------------------------------
177
178 -- | Distributional Distance
179 distributional :: Matrix Int -> Matrix Double
180 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
181 where
182 n = dim m
183
184 filter m = zipWith (\a b -> max a b) m (transpose m)
185
186 ri mat = zipWith (/) mat1 mat2
187 where
188 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
189 mat2 = mkSum n mat
190
191 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
192 $ zipWith (/) (crossProduct m') (total m')
193
194 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
195
196 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
197 cross mat = zipWith (-) (mkSum n mat) (mat)
198
199
200 int2double :: Matrix Int -> Matrix Double
201 int2double m = run (map fromIntegral $ use m)
202
203 incExcSpeGen' :: Matrix Int -> (Vector Double, Vector Double)
204 incExcSpeGen' m = (run' ie m, run' sg m)
205 where
206 run' fun mat = run $ fun $ map fromIntegral $ use mat
207
208 ie :: Acc (Matrix Double) -> Acc (Vector Double)
209 ie mat = zipWith (-) (pV mat) (pH mat)
210 --
211 sg :: Acc (Matrix Double) -> Acc (Vector Double)
212 sg mat = zipWith (+) (pV mat) (pH mat)
213
214 n :: Exp Double
215 n = constant (P.fromIntegral (dim m) :: Double)
216
217 pV :: Acc (Matrix Double) -> Acc (Vector Double)
218 pV mat = map (\x -> (x-1)/(n-1)) $ sum $ p_ij mat
219
220 pH :: Acc (Matrix Double) -> Acc (Vector Double)
221 pH mat = map (\x -> (x-1)/(n-1)) $ sum $ p_ji mat
222
223
224 incExcSpeGen_proba :: Matrix Int -> Matrix Double
225 incExcSpeGen_proba m = run' pro m
226 where
227 run' fun mat = run $ fun $ map fromIntegral $ use mat
228
229 pro mat = p_ij mat