]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
[WORKFLOW] cleaned, adding data2graph.
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
16
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
20
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
24
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
28
29 -}
30
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
35 {-# LANGUAGE ScopedTypeVariables #-}
36
37
38 module Gargantext.Viz.Graph.Distances.Matrice
39 where
40
41 import Data.Array.Accelerate
42 import Data.Array.Accelerate.Interpreter (run)
43 import Data.Array.Accelerate.Smart
44 import Data.Array.Accelerate.Type
45 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
46
47 import Data.Maybe (Maybe(Just))
48 import qualified Gargantext.Prelude as P
49 import qualified Data.Array.Accelerate.Array.Representation as Repr
50
51 import Gargantext.Text.Metrics.Count
52
53
54 -----------------------------------------------------------------------
55 -- Test perf.
56 distriTest = distributional $ myMat 100
57 -----------------------------------------------------------------------
58
59 vector :: Int -> (Array (Z :. Int) Int)
60 vector n = fromList (Z :. n) [0..n]
61
62 matrix :: Elt c => Int -> [c] -> Matrix c
63 matrix n l = fromList (Z :. n :. n) l
64
65 myMat :: Int -> Matrix Int
66 myMat n = matrix n [1..]
67
68 -- | Two ways to get the rank (as documentation)
69 rank :: (Matrix a) -> Int
70 rank m = arrayRank $ arrayShape m
71
72 -----------------------------------------------------------------------
73 -- | Dimension of a square Matrix
74 -- How to force use with SquareMatrix ?
75 type Dim = Int
76
77 dim :: (Matrix a) -> Dim
78 dim m = n
79 where
80 Z :. _ :. n = arrayShape m
81 -- == indexTail (arrayShape m)
82
83 -----------------------------------------------------------------------
84 proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
85 proba r mat = zipWith (/) mat (mkSum r mat)
86
87 mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
88 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
89
90 -- divByDiag
91 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
92 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
93 where
94 diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
95 diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
96 -----------------------------------------------------------------------
97
98 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
99 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
100 where
101 miniMax' = (the $ minimum $ maximum m)
102
103 -- | Conditional distance (basic version)
104 conditional :: Matrix Int -> Matrix Double
105 conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
106
107
108 -- | Conditional distance (advanced version)
109 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
110 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
111 where
112 ie :: Acc (Matrix Double) -> Acc (Matrix Double)
113 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
114 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
115 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
116
117 n :: Exp Double
118 n = P.fromIntegral r
119
120 r :: Dim
121 r = dim m
122
123 xs :: Acc (Matrix Double) -> Acc (Matrix Double)
124 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
125 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
126 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
127
128 -----------------------------------------------------------------------
129
130 -- | Distributional Distance
131 distributional :: Matrix Int -> Matrix Double
132 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
133 where
134 n = dim m
135
136 filter m = zipWith (\a b -> max a b) m (transpose m)
137
138 ri mat = zipWith (/) mat1 mat2
139 where
140 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
141 mat2 = mkSum n mat
142
143 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
144 $ zipWith (/) (crossProduct m') (total m')
145
146 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
147
148 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
149 cross mat = zipWith (-) (mkSum n mat) (mat)
150
151
152
153 -----------------------------------------------------------------------
154 -----------------------------------------------------------------------
155
156 {-
157 Metric Specificity and genericity: select terms
158
159 let N termes
160 Ni : occ de i
161 Nij : cooc i et j
162 Probability to get i given j : P(i|j)=Nij/Nj
163 Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
164 Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
165 Inclusion (i) = Gen(i)+Spec(i)
166 Genericity score = Gen(i)- Spec(i)
167
168
169 References:
170 * Science mapping with asymmetrical paradigmatic proximity Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276, arXiv:0803.2315 [cs.OH]
171
172 -}
173
174 type InclusionExclusion = Double
175 type SpecificityGenericity = Double
176
177 data SquareMatrix = SymetricMatrix | NonSymetricMatrix
178 type SymetricMatrix = Matrix
179 type NonSymetricMatrix = Matrix
180
181
182 incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
183 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
184 where
185 run' fun mat = run $ fun $ map fromIntegral $ use mat
186
187 -- | Inclusion (i) = Gen(i)+Spec(i)
188 inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
189 inclusionExclusion mat = zipWith (+) (pV mat) (pH mat)
190 --
191 -- | Genericity score = Gen(i)- Spec(i)
192 specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
193 specificityGenericity mat = zipWith (-) (pV mat) (pH mat)
194
195 -- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
196 pV :: Acc (Matrix Double) -> Acc (Vector Double)
197 pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
198
199 -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
200 pH :: Acc (Matrix Double) -> Acc (Vector Double)
201 pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
202
203 cardN :: Exp Double
204 cardN = constant (P.fromIntegral (dim m) :: Double)
205
206
207 -- | P(i|j) = Nij /N(jj) Probability to get i given j
208 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
209 p_ij m = zipWith (/) m (n_jj m)
210 where
211 n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
212 n_jj m = backpermute (shape m)
213 (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
214 -> (Z :. j :. j)
215 )
216 ) m
217
218 -- | P(j|i) = Nij /N(ii) Probability to get i given j
219 -- to test
220 p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
221 p_ji = transpose . p_ij
222
223
224 -- | Step to ckeck the result in visual/qualitative tests
225 incExcSpeGen_proba :: Matrix Int -> Matrix Double
226 incExcSpeGen_proba m = run' pro m
227 where
228 run' fun mat = run $ fun $ map fromIntegral $ use mat
229
230 pro mat = p_ji mat
231
232 {-
233 -- | Hypothesis to test maybe later (or not)
234 -- TODO ask accelerate for instances to ease such writtings:
235 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
236 p_ m = zipWith (/) m (n_ m)
237 where
238 n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
239 n_ m = backpermute (shape m)
240 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
241 -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
242 )
243 ) m
244 -}
245
246