]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
[WORKFLOW] clean, issue in map2mat: diagonal == 0.
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
16
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
20
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
24
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
28
29 -}
30
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
35 {-# LANGUAGE ScopedTypeVariables #-}
36
37
38 module Gargantext.Viz.Graph.Distances.Matrice
39 where
40
41 import Data.Array.Accelerate
42 import Data.Array.Accelerate.Interpreter (run)
43 import Data.Array.Accelerate.Smart
44 import Data.Array.Accelerate.Type
45 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
46
47 import Data.Maybe (Maybe(Just))
48 import qualified Gargantext.Prelude as P
49 import qualified Data.Array.Accelerate.Array.Representation as Repr
50
51 import Gargantext.Text.Metrics.Count
52
53
54 -----------------------------------------------------------------------
55 -- Test perf.
56 distriTest = distributional $ myMat 100
57 -----------------------------------------------------------------------
58
59 vector :: Int -> (Array (Z :. Int) Int)
60 vector n = fromList (Z :. n) [0..n]
61
62 matrix :: Elt c => Int -> [c] -> Matrix c
63 matrix n l = fromList (Z :. n :. n) l
64
65 myMat :: Int -> Matrix Int
66 myMat n = matrix n [1..]
67
68 -- | Two ways to get the rank (as documentation)
69 rank :: (Matrix a) -> Int
70 rank m = arrayRank $ arrayShape m
71
72 -----------------------------------------------------------------------
73 -- | Dimension of a square Matrix
74 -- How to force use with SquareMatrix ?
75 type Dim = Int
76
77 dim :: (Matrix a) -> Dim
78 dim m = n
79 where
80 Z :. _ :. n = arrayShape m
81 -- == indexTail (arrayShape m)
82
83 -----------------------------------------------------------------------
84 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
85 proba r mat = zipWith (/) mat (mkSum r mat)
86
87 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
88 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
89
90 -- divByDiag
91 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
92 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
93 where
94 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
95 diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
96 -----------------------------------------------------------------------
97
98 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
99 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
100 where
101 miniMax' = (the $ minimum $ maximum m)
102
103 -- | Conditional distance (basic version)
104 conditional :: Matrix Int -> Matrix Double
105 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
106
107
108 -- | Conditional distance (advanced version)
109 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
110 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
111 where
112 ie :: Acc (Matrix Double) -> Acc (Matrix Double)
113 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
114 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
115 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
116
117 n :: Exp Double
118 n = P.fromIntegral r
119
120 r :: Dim
121 r = dim m
122
123 xs :: Acc (Matrix Double) -> Acc (Matrix Double)
124 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
125 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
126 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
127
128 -----------------------------------------------------------------------
129
130 -- | Distributional Distance
131 distributional :: Matrix Int -> Matrix Double
132 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
133 where
134 n = dim m
135
136 filter m = zipWith (\a b -> max a b) m (transpose m)
137
138 ri mat = zipWith (/) mat1 mat2
139 where
140 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
141 mat2 = mkSum n mat
142
143 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
144 $ zipWith (/) (crossProduct m') (total m')
145
146 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
147
148 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
149 cross mat = zipWith (-) (mkSum n mat) (mat)
150
151 -----------------------------------------------------------------------
152 -----------------------------------------------------------------------
153
154 {-
155 Metric Specificity and genericity: select terms
156
157 let N termes
158 Ni : occ de i
159 Nij : cooc i et j
160 Probability to get i given j : P(i|j)=Nij/Nj
161 Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
162 Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
163 Inclusion (i) = Gen(i)+Spec(i)
164 Genericity score = Gen(i)- Spec(i)
165
166
167 References:
168 * Science mapping with asymmetrical paradigmatic proximity Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276, arXiv:0803.2315 [cs.OH]
169
170 -}
171
172 type InclusionExclusion = Double
173 type SpecificityGenericity = Double
174
175 data SquareMatrix = SymetricMatrix | NonSymetricMatrix
176 type SymetricMatrix = Matrix
177 type NonSymetricMatrix = Matrix
178
179
180 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
181 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
182 where
183 run' fun mat = run $ fun $ map fromIntegral $ use mat
184
185 -- | Inclusion (i) = Gen(i)+Spec(i)
186 inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
187 inclusionExclusion mat = zipWith (+) (pV mat) (pH mat)
188 --
189 -- | Genericity score = Gen(i)- Spec(i)
190 specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
191 specificityGenericity mat = zipWith (-) (pV mat) (pH mat)
192
193 -- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
194 pV :: Acc (Matrix Double) -> Acc (Vector Double)
195 pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
196
197 -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
198 pH :: Acc (Matrix Double) -> Acc (Vector Double)
199 pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
200
201 cardN :: Exp Double
202 cardN = constant (P.fromIntegral (dim m) :: Double)
203
204
205 -- | P(i|j) = Nij /N(jj) Probability to get i given j
206 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
207 p_ij m = zipWith (/) m (n_jj m)
208 where
209 n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
210 n_jj m = backpermute (shape m)
211 (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
212 -> (Z :. j :. j)
213 )
214 ) m
215
216 -- | P(j|i) = Nij /N(ii) Probability to get i given j
217 -- to test
218 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
219 p_ji = transpose . p_ij
220
221
222 -- | Step to ckeck the result in visual/qualitative tests
223 incExcSpeGen_proba :: Matrix Int -> Matrix Double
224 incExcSpeGen_proba m = run' pro m
225 where
226 run' fun mat = run $ fun $ map fromIntegral $ use mat
227
228 pro mat = p_ji mat
229
230 {-
231 -- | Hypothesis to test maybe later (or not)
232 -- TODO ask accelerate for instances to ease such writtings:
233 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
234 p_ m = zipWith (/) m (n_ m)
235 where
236 n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
237 n_ m = backpermute (shape m)
238 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
239 -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
240 )
241 ) m
242 -}
243
244