]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
[COUNT] renaming file and generic function.
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
16
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
20
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
24
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
28
29 -}
30
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
35
36 module Gargantext.Viz.Graph.Distances.Matrice
37 where
38
39 import Data.Array.Accelerate
40 import Data.Array.Accelerate.Interpreter (run)
41 import Data.Array.Accelerate.Smart
42 import Data.Array.Accelerate.Type
43 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
44
45 import Data.Maybe (Maybe(Just))
46 import qualified Gargantext.Prelude as P
47 import qualified Data.Array.Accelerate.Array.Representation as Repr
48
49 import Gargantext.Text.Metrics.Count
50
51
52 -----------------------------------------------------------------------
53 -- Test perf.
54 distriTest = distributional $ myMat 100
55 -----------------------------------------------------------------------
56
57 vector :: Int -> (Array (Z :. Int) Int)
58 vector n = fromList (Z :. n) [0..n]
59
60 matrix :: Elt c => Int -> [c] -> Matrix c
61 matrix n l = fromList (Z :. n :. n) l
62
63 myMat :: Int -> Matrix Int
64 myMat n = matrix n [1..]
65
66 -- | Two ways to get the rank (as documentation)
67 rank :: (Matrix a) -> Int
68 rank m = arrayRank $ arrayShape m
69
70 rank' :: (Matrix a) -> Int
71 rank' m = n
72 where
73 Z :. _ :. n = arrayShape m
74
75 -----------------------------------------------------------------------
76 -- | Conditional Distance
77
78 type Rank = Int
79
80 proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
81 proba r mat = zipWith (/) mat (mkSum r mat)
82
83 mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
84 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All))
85 $ fold (+) 0 mat
86
87
88 type Matrix' a = Acc (Matrix a)
89 type InclusionExclusion = Double
90 type SpecificityGenericity = Double
91
92
93 miniMax :: Matrix' Double -> Matrix' Double
94 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
95 where
96 miniMax' = (the $ minimum $ maximum m)
97
98 conditional :: Matrix Int -> Matrix Double
99 conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
100 where
101 r :: Rank
102 r = rank' m
103
104
105 {-
106 Metric Specificity and genericty: select terms
107 Compute genericity/specificity:
108 P(j|i) = N(ij) / N(ii)
109 P(i|j) = N(ij) / N(jj)
110
111 Gen(i) = Mean{j} P(j_k|i)
112 Spec(i) = Mean{j} P(i|j_k)
113
114 Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
115 Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
116
117 -}
118
119 incExcSpeGen :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
120 incExcSpeGen m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
121 where
122
123 ie :: Matrix' Double -> Matrix' Double
124 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
125 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
126 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
127
128 n :: Exp Double
129 n = P.fromIntegral r
130
131 r :: Rank
132 r = rank' m
133
134 xs :: Matrix' Double -> Matrix' Double
135 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
136 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
137 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
138
139 -- filter with threshold
140 -----------------------------------------------------------------------
141
142 -- | Distributional Distance
143
144 distributional :: Matrix Int -> Matrix Double
145 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
146 where
147 n = rank' m
148
149 filter m = zipWith (\a b -> max a b) m (transpose m)
150
151 ri mat = zipWith (/) mat1 mat2
152 where
153 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
154 mat2 = mkSum n mat
155
156 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
157 $ zipWith (/) (crossProduct m') (total m')
158
159 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
160
161 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
162 cross mat = zipWith (-) (mkSum n mat) (mat)
163
164
165 int2double :: Matrix Int -> Matrix Double
166 int2double m = run (map fromIntegral $ use m)
167