]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
Merge remote-tracking branch 'origin/pipeline' into pipeline
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
16
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
20
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
24
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
28
29 -}
30
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
35
36 module Gargantext.Viz.Graph.Distances.Matrice
37 where
38
39 import Data.Array.Accelerate
40 import Data.Array.Accelerate.Interpreter (run)
41 import Data.Array.Accelerate.Smart
42 import Data.Array.Accelerate.Type
43 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
44
45 import Data.Maybe (Maybe(Just))
46 import qualified Gargantext.Prelude as P
47 import qualified Data.Array.Accelerate.Array.Representation as Repr
48
49 import Gargantext.Text.Metrics.Count
50
51
52 -----------------------------------------------------------------------
53 -- Test perf.
54 distriTest = distributional $ myMat 100
55 -----------------------------------------------------------------------
56
57 vector :: Int -> (Array (Z :. Int) Int)
58 vector n = fromList (Z :. n) [0..n]
59
60 matrix :: Elt c => Int -> [c] -> Matrix c
61 matrix n l = fromList (Z :. n :. n) l
62
63 myMat :: Int -> Matrix Int
64 myMat n = matrix n [1..]
65
66 -- | Two ways to get the rank (as documentation)
67 rank :: (Matrix a) -> Int
68 rank m = arrayRank $ arrayShape m
69
70 rank' :: (Matrix a) -> Int
71 rank' m = n
72 where
73 Z :. _ :. n = arrayShape m
74
75 -----------------------------------------------------------------------
76 -- | Conditional Distance
77
78 type Rank = Int
79
80 proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
81 proba r mat = zipWith (/) mat (mkSum r mat)
82
83 mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
84 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All))
85 $ fold (+) 0 mat
86
87
88 type Matrix' a = Acc (Matrix a)
89 type InclusionExclusion = Double
90 type SpecificityGenericity = Double
91
92
93 miniMax :: Matrix' Double -> Matrix' Double
94 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
95 where
96 miniMax' = (the $ minimum $ maximum m)
97
98 -- | Conditional distance (basic version)
99 conditional :: Matrix Int -> Matrix Double
100 conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
101 where
102 r :: Rank
103 r = rank' m
104
105
106 -- | Conditional distance (advanced version)
107 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
108 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
109 where
110
111 ie :: Matrix' Double -> Matrix' Double
112 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
113 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
114 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
115
116 n :: Exp Double
117 n = P.fromIntegral r
118
119 r :: Rank
120 r = rank' m
121
122 xs :: Matrix' Double -> Matrix' Double
123 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
124 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
125 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
126
127 -----------------------------------------------------------------------
128
129 -- | Distributional Distance
130 distributional :: Matrix Int -> Matrix Double
131 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
132 where
133 n = rank' m
134
135 filter m = zipWith (\a b -> max a b) m (transpose m)
136
137 ri mat = zipWith (/) mat1 mat2
138 where
139 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
140 mat2 = mkSum n mat
141
142 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
143 $ zipWith (/) (crossProduct m') (total m')
144
145 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
146
147 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
148 cross mat = zipWith (-) (mkSum n mat) (mat)
149
150
151 int2double :: Matrix Int -> Matrix Double
152 int2double m = run (map fromIntegral $ use m)
153
154 {-
155 Metric Specificity and genericty: select terms
156 Compute genericity/specificity:
157 P(j|i) = N(ij) / N(ii)
158 P(i|j) = N(ij) / N(jj)
159
160 Gen(i) = Mean{j} P(j_k|i)
161 Spec(i) = Mean{j} P(i|j_k)
162
163 Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
164 Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
165
166 -}
167
168 incExcSpeGen :: Matrix Int -> (Vector Double, Vector Double)
169 incExcSpeGen m = (run' ie m, run' sg m)
170 where
171 run' fun mat = run $ fun $ map fromIntegral $ use mat
172
173 pV :: Matrix' Double -> Acc (Vector Double)
174 pV mat = sum $ proba (rank' m) mat
175
176 pH :: Matrix' Double -> Acc (Vector Double)
177 pH mat = sum $ transpose $ proba (rank' m) mat
178
179 ie :: Matrix' Double -> Acc (Vector Double)
180 ie mat = zipWith (-) (pV mat) (pH mat)
181
182 sg :: Matrix' Double -> Acc (Vector Double)
183 sg mat = zipWith (+) (pV mat) (pH mat)
184
185