]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
[SCORE] spegen quality tested, need to add test in comments.
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
16
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
20
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
24
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
28
29 -}
30
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
35
36 module Gargantext.Viz.Graph.Distances.Matrice
37 where
38
39 import Data.Array.Accelerate
40 import Data.Array.Accelerate.Interpreter (run)
41 import Data.Array.Accelerate.Smart
42 import Data.Array.Accelerate.Type
43 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
44
45 import Data.Maybe (Maybe(Just))
46 import qualified Gargantext.Prelude as P
47 import qualified Data.Array.Accelerate.Array.Representation as Repr
48
49 import Gargantext.Text.Metrics.Count
50
51
52 -----------------------------------------------------------------------
53 -- Test perf.
54 distriTest = distributional $ myMat 100
55 -----------------------------------------------------------------------
56
57 vector :: Int -> (Array (Z :. Int) Int)
58 vector n = fromList (Z :. n) [0..n]
59
60 matrix :: Elt c => Int -> [c] -> Matrix c
61 matrix n l = fromList (Z :. n :. n) l
62
63 myMat :: Int -> Matrix Int
64 myMat n = matrix n [1..]
65
66 -- | Two ways to get the rank (as documentation)
67 rank :: (Matrix a) -> Int
68 rank m = arrayRank $ arrayShape m
69
70 rank' :: (Matrix a) -> Int
71 rank' m = n
72 where
73 Z :. _ :. n = arrayShape m
74
75 -----------------------------------------------------------------------
76 -- | Conditional Distance
77
78 type Rank = Int
79
80 proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
81 proba r mat = zipWith (/) mat (mkSum r mat)
82
83 mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
84 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
85
86 divByDiag :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
87 divByDiag r mat = zipWith (/) mat (replicate (constant (Z :. (r :: Int) :. All)) $ diag mat)
88
89 diag :: forall e. Elt e => Acc (Matrix e) -> Acc (Vector e)
90 diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) (m :: Acc (Array DIM2 e))
91
92
93 type Matrix' a = Acc (Matrix a)
94 type InclusionExclusion = Double
95 type SpecificityGenericity = Double
96
97
98 miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
99 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
100 where
101 miniMax' = (the $ minimum $ maximum m)
102
103 -- | Conditional distance (basic version)
104 conditional :: Matrix Int -> Matrix Double
105 conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
106 where
107 r :: Rank
108 r = rank' m
109
110
111 -- | Conditional distance (advanced version)
112 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
113 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
114 where
115
116 ie :: Matrix' Double -> Matrix' Double
117 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
118 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
119 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
120
121 n :: Exp Double
122 n = P.fromIntegral r
123
124 r :: Rank
125 r = rank' m
126
127 xs :: Matrix' Double -> Matrix' Double
128 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
129 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
130 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
131
132 -----------------------------------------------------------------------
133
134 -- | Distributional Distance
135 distributional :: Matrix Int -> Matrix Double
136 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
137 where
138 n = rank' m
139
140 filter m = zipWith (\a b -> max a b) m (transpose m)
141
142 ri mat = zipWith (/) mat1 mat2
143 where
144 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
145 mat2 = mkSum n mat
146
147 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
148 $ zipWith (/) (crossProduct m') (total m')
149
150 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
151
152 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
153 cross mat = zipWith (-) (mkSum n mat) (mat)
154
155
156 int2double :: Matrix Int -> Matrix Double
157 int2double m = run (map fromIntegral $ use m)
158
159 {-
160 Metric Specificity and genericity: select terms
161 Compute genericity/specificity:
162 P(j|i) = N(ij) / N(ii)
163 P(i|j) = N(ij) / N(jj)
164
165 Gen(i) = Mean{j} P(j_k|i)
166 Spec(i) = Mean{j} P(i|j_k)
167
168 Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
169 Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
170
171 -}
172
173
174 incExcSpeGen' :: Matrix Int -> (Vector Double, Vector Double)
175 incExcSpeGen' m = (run' ie m, run' sg m)
176 where
177 run' fun mat = run $ fun $ map fromIntegral $ use mat
178
179 ie :: Acc (Matrix Double) -> Acc (Vector Double)
180 ie mat = zipWith (-) (pV mat) (pH mat)
181 --
182 sg :: Acc (Matrix Double) -> Acc (Vector Double)
183 sg mat = zipWith (+) (pV mat) (pH mat)
184
185 n :: Exp Double
186 n = constant (P.fromIntegral (rank' m - 1) :: Double)
187
188 pV :: Acc (Matrix Double) -> Acc (Vector Double)
189 pV mat = map (\x -> (x-1)/n) $ sum $ divByDiag (rank' m) mat
190
191 pH :: Acc (Matrix Double) -> Acc (Vector Double)
192 pH mat = map (\x -> (x-1)/n) $ sum $ transpose $ divByDiag (rank' m) mat
193
194
195
196 incExcSpeGen_proba :: Matrix Int -> Matrix Double
197 incExcSpeGen_proba m = run' pro m
198 where
199 run' fun mat = run $ fun $ map fromIntegral $ use mat
200
201 pro mat = divByDiag (rank' m) mat