]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
[FIX scores]
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
16
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
20
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
24
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
28
29 -}
30
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
35 {-# LANGUAGE ScopedTypeVariables #-}
36
37
38 module Gargantext.Viz.Graph.Distances.Matrice
39 where
40
41 import Data.Array.Accelerate
42 import Data.Array.Accelerate.Interpreter (run)
43 import Data.Array.Accelerate.Smart
44 import Data.Array.Accelerate.Type
45 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
46
47 import Data.Maybe (Maybe(Just))
48 import qualified Gargantext.Prelude as P
49 import qualified Data.Array.Accelerate.Array.Representation as Repr
50
51 import Gargantext.Text.Metrics.Count
52
53
54 -----------------------------------------------------------------------
55 -- Test perf.
56 distriTest = distributional $ myMat 100
57 -----------------------------------------------------------------------
58
59 vector :: Int -> (Array (Z :. Int) Int)
60 vector n = fromList (Z :. n) [0..n]
61
62 matrix :: Elt c => Int -> [c] -> Matrix c
63 matrix n l = fromList (Z :. n :. n) l
64
65 myMat :: Int -> Matrix Int
66 myMat n = matrix n [1..]
67
68 -- | Two ways to get the rank (as documentation)
69 rank :: (Matrix a) -> Int
70 rank m = arrayRank $ arrayShape m
71
72 -----------------------------------------------------------------------
73 -- | Dimension of a square Matrix
74 -- How to force use with SquareMatrix ?
75 type Dim = Int
76
77 dim :: (Matrix a) -> Dim
78 dim m = n
79 where
80 Z :. _ :. n = arrayShape m
81 -- == indexTail (arrayShape m)
82
83 -----------------------------------------------------------------------
84 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
85 proba r mat = zipWith (/) mat (mkSum r mat)
86
87 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
88 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
89
90 -- divByDiag
91 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
92 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
93 where
94 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
95 diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
96 -----------------------------------------------------------------------
97
98 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
99 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
100 where
101 miniMax' = (the $ minimum $ maximum m)
102
103 -- | Conditional distance (basic version)
104 conditional :: Matrix Int -> Matrix Double
105 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
106
107
108 -- | Conditional distance (advanced version)
109 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
110 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
111 where
112
113 ie :: Acc (Matrix Double) -> Acc (Matrix Double)
114 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
115 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
116 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
117
118 n :: Exp Double
119 n = P.fromIntegral r
120
121 r :: Dim
122 r = dim m
123
124 xs :: Acc (Matrix Double) -> Acc (Matrix Double)
125 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
126 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
127 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
128
129 -----------------------------------------------------------------------
130
131 -- | Distributional Distance
132 distributional :: Matrix Int -> Matrix Double
133 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
134 where
135 n = dim m
136
137 filter m = zipWith (\a b -> max a b) m (transpose m)
138
139 ri mat = zipWith (/) mat1 mat2
140 where
141 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
142 mat2 = mkSum n mat
143
144 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
145 $ zipWith (/) (crossProduct m') (total m')
146
147 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
148
149 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
150 cross mat = zipWith (-) (mkSum n mat) (mat)
151
152
153
154 -----------------------------------------------------------------------
155 -----------------------------------------------------------------------
156 -- | Conditional Distance
157
158 {-
159 Metric Specificity and genericity: select terms
160
161 N termes
162
163 Ni : occ de i
164
165 Nij : cooc i et j
166
167 P(i|j)=Nij/Nj Probability to get i given j
168
169 Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
170
171 Spec(i) : 1/(N-1)*Sum( j!=i, P(j|i)) : Specificity of j
172
173 Inclusion (i) = Gen(i)+Spec(i)
174
175 Genericity score = Gen(i)- Spec(i)
176
177
178 References:
179 * Science mapping with asymmetrical paradigmatic proximity Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276, arXiv:0803.2315 [cs.OH]
180
181 -}
182
183 type InclusionExclusion = Double
184 type SpecificityGenericity = Double
185
186 data SquareMatrix = SymetricMatrix | NonSymetricMatrix
187 type SymetricMatrix = Matrix
188 type NonSymetricMatrix = Matrix
189
190
191 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
192 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
193 where
194 run' fun mat = run $ fun $ map fromIntegral $ use mat
195
196 inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
197 inclusionExclusion mat = zipWith (+) (pV mat) (pH mat)
198 --
199 specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
200 specificityGenericity mat = zipWith (-) (pV mat) (pH mat)
201
202 -- TODO find a better term
203 pV :: Acc (Matrix Double) -> Acc (Vector Double)
204 pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
205
206 -- TODO find a better term
207 pH :: Acc (Matrix Double) -> Acc (Vector Double)
208 pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
209
210 cardN :: Exp Double
211 cardN = constant (P.fromIntegral (dim m) :: Double)
212
213
214
215
216 ---- | P(i|j) = N(ij) / N(jj)
217 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
218 p_ij m = zipWith (/) m (n_jj m)
219 where
220 n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
221 n_jj m = backpermute (shape m)
222 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
223 -> (Z :. j :. j)
224 )
225 ) m
226
227 -- | P(j|i) = N(ij) / N(ii)
228 -- to test
229 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
230 p_ji = transpose . p_ij
231
232 -- | step to ckeck the result
233 incExcSpeGen_proba :: Matrix Int -> Matrix Double
234 incExcSpeGen_proba m = run' pro m
235 where
236 run' fun mat = run $ fun $ map fromIntegral $ use mat
237
238 pro mat = p_ji mat
239
240
241 {-
242 -- | Hypothesis to test maybe later (or not)
243 -- TODO ask accelerate for instances to ease such writtings:
244 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
245 p_ m = zipWith (/) m (n_ m)
246 where
247 n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
248 n_ m = backpermute (shape m)
249 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
250 -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
251 )
252 ) m
253 -}
254
255