]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
Merge branch 'pipeline'
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
16
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
20
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
24
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
28
29 -}
30
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
35 {-# LANGUAGE ScopedTypeVariables #-}
36
37
38 module Gargantext.Viz.Graph.Distances.Matrice
39 where
40
41 import Data.Array.Accelerate
42 import Data.Array.Accelerate.Interpreter (run)
43 import Data.Array.Accelerate.Smart
44 import Data.Array.Accelerate.Type
45 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
46
47 import Data.Maybe (Maybe(Just))
48 import qualified Gargantext.Prelude as P
49 import qualified Data.Array.Accelerate.Array.Representation as Repr
50
51 import Gargantext.Text.Metrics.Count
52
53
54 -----------------------------------------------------------------------
55 -- Test perf.
56 distriTest = distributional $ myMat 100
57 -----------------------------------------------------------------------
58
59 vector :: Int -> (Array (Z :. Int) Int)
60 vector n = fromList (Z :. n) [0..n]
61
62 matrix :: Elt c => Int -> [c] -> Matrix c
63 matrix n l = fromList (Z :. n :. n) l
64
65 myMat :: Int -> Matrix Int
66 myMat n = matrix n [1..]
67
68 -- | Two ways to get the rank (as documentation)
69 rank :: (Matrix a) -> Int
70 rank m = arrayRank $ arrayShape m
71
72 -----------------------------------------------------------------------
73 -- | Dimension of a square Matrix
74 -- How to force use with SquareMatrix ?
75 type Dim = Int
76
77 dim :: (Matrix a) -> Dim
78 dim m = n
79 where
80 Z :. _ :. n = arrayShape m
81 -- == indexTail (arrayShape m)
82
83 -----------------------------------------------------------------------
84 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
85 proba r mat = zipWith (/) mat (mkSum r mat)
86
87 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
88 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
89
90 -- divByDiag
91 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
92 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
93 where
94 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
95 diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
96 -----------------------------------------------------------------------
97
98 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
99 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
100 where
101 miniMax' = (the $ minimum $ maximum m)
102
103 -- | Conditional distance (basic version)
104 conditional :: Matrix Int -> Matrix Double
105 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
106
107
108 -- | Conditional distance (advanced version)
109 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
110 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
111 where
112
113 ie :: Acc (Matrix Double) -> Acc (Matrix Double)
114 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
115 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
116 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
117
118 n :: Exp Double
119 n = P.fromIntegral r
120
121 r :: Dim
122 r = dim m
123
124 xs :: Acc (Matrix Double) -> Acc (Matrix Double)
125 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
126 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
127 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
128
129 -----------------------------------------------------------------------
130
131 -- | Distributional Distance
132 distributional :: Matrix Int -> Matrix Double
133 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
134 where
135 n = dim m
136
137 filter m = zipWith (\a b -> max a b) m (transpose m)
138
139 ri mat = zipWith (/) mat1 mat2
140 where
141 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
142 mat2 = mkSum n mat
143
144 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
145 $ zipWith (/) (crossProduct m') (total m')
146
147 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
148
149 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
150 cross mat = zipWith (-) (mkSum n mat) (mat)
151
152
153
154 -----------------------------------------------------------------------
155 -----------------------------------------------------------------------
156
157 {-
158 Metric Specificity and genericity: select terms
159
160 let N termes
161 Ni : occ de i
162 Nij : cooc i et j
163 Probability to get i given j : P(i|j)=Nij/Nj
164 Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
165 Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
166 Inclusion (i) = Gen(i)+Spec(i)
167 Genericity score = Gen(i)- Spec(i)
168
169
170 References:
171 * Science mapping with asymmetrical paradigmatic proximity Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276, arXiv:0803.2315 [cs.OH]
172
173 -}
174
175 type InclusionExclusion = Double
176 type SpecificityGenericity = Double
177
178 data SquareMatrix = SymetricMatrix | NonSymetricMatrix
179 type SymetricMatrix = Matrix
180 type NonSymetricMatrix = Matrix
181
182
183 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
184 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
185 where
186 run' fun mat = run $ fun $ map fromIntegral $ use mat
187
188 -- | Inclusion (i) = Gen(i)+Spec(i)
189 inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
190 inclusionExclusion mat = zipWith (+) (pV mat) (pH mat)
191 --
192 -- | Genericity score = Gen(i)- Spec(i)
193 specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
194 specificityGenericity mat = zipWith (-) (pV mat) (pH mat)
195
196 -- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
197 pV :: Acc (Matrix Double) -> Acc (Vector Double)
198 pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
199
200 -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
201 pH :: Acc (Matrix Double) -> Acc (Vector Double)
202 pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
203
204 cardN :: Exp Double
205 cardN = constant (P.fromIntegral (dim m) :: Double)
206
207
208 -- | P(i|j) = Nij /N(jj) Probability to get i given j
209 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
210 p_ij m = zipWith (/) m (n_jj m)
211 where
212 n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
213 n_jj m = backpermute (shape m)
214 (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
215 -> (Z :. j :. j)
216 )
217 ) m
218
219 -- | P(j|i) = Nij /N(ii) Probability to get i given j
220 -- to test
221 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
222 p_ji = transpose . p_ij
223
224
225 -- | Step to ckeck the result in visual/qualitative tests
226 incExcSpeGen_proba :: Matrix Int -> Matrix Double
227 incExcSpeGen_proba m = run' pro m
228 where
229 run' fun mat = run $ fun $ map fromIntegral $ use mat
230
231 pro mat = p_ji mat
232
233 {-
234 -- | Hypothesis to test maybe later (or not)
235 -- TODO ask accelerate for instances to ease such writtings:
236 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
237 p_ m = zipWith (/) m (n_ m)
238 where
239 n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
240 n_ m = backpermute (shape m)
241 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
242 -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
243 )
244 ) m
245 -}
246
247