]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
[Pipeline] ok until clustering.
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
16
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
20
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
24
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
28
29 -}
30
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
35
36 module Gargantext.Viz.Graph.Distances.Matrice
37 where
38
39 import Data.Array.Accelerate
40 import Data.Array.Accelerate.Interpreter (run)
41 import Data.Array.Accelerate.Smart
42 import Data.Array.Accelerate.Type
43 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
44
45 import Data.Maybe (Maybe(Just))
46 import qualified Gargantext.Prelude as P
47 import qualified Data.Array.Accelerate.Array.Representation as Repr
48
49 import Gargantext.Text.Metrics.Occurrences
50
51
52 -----------------------------------------------------------------------
53 -- Test perf.
54 distriTest = distributional $ myMat 100
55 -----------------------------------------------------------------------
56
57 vector :: Int -> (Array (Z :. Int) Int)
58 vector n = fromList (Z :. n) [0..n]
59
60 matrix :: Elt c => Int -> [c] -> Matrix c
61 matrix n l = fromList (Z :. n :. n) l
62
63 myMat :: Int -> Matrix Int
64 myMat n = matrix n [1..]
65
66 -- | Two ways to get the rank (as documentation)
67 rank :: (Matrix a) -> Int
68 rank m = arrayRank $ arrayShape m
69
70 rank' :: (Matrix a) -> Int
71 rank' m = n
72 where
73 Z :. _ :. n = arrayShape m
74
75 -----------------------------------------------------------------------
76 -- | Conditional Distance
77
78 type Rank = Int
79
80 proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
81 proba r mat = zipWith (/) mat (mkSum r mat)
82
83 mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
84 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All))
85 $ fold (+) 0 mat
86
87
88 type Matrix' a = Acc (Matrix a)
89 type InclusionExclusion = Double
90 type SpecificityGenericity = Double
91
92 conditional :: Matrix Double -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
93 conditional m = (run $ ie (use m), run $ sg (use m))
94 where
95
96 ie :: Matrix' Double -> Matrix' Double
97 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
98 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
99 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
100
101 n :: Exp Double
102 n = P.fromIntegral r
103
104 r :: Rank
105 r = rank' m
106
107 xs :: Matrix' Double -> Matrix' Double
108 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
109 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
110 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
111
112 -- filter with threshold
113 -----------------------------------------------------------------------
114
115 -- | Distributional Distance
116
117 distributional :: Matrix Int -> Matrix Double
118 distributional m = run $ filter $ ri (map fromIntegral $ use m)
119 where
120 n = rank' m
121
122 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
123 where
124 miniMax' = (the $ minimum $ maximum m)
125
126 filter m = zipWith (\a b -> max a b) m (transpose m)
127
128 ri mat = zipWith (/) mat1 mat2
129 where
130 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
131 mat2 = mkSum n mat
132
133 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
134 $ zipWith (/) (crossProduct m') (total m')
135
136 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
137
138 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
139 cross mat = zipWith (-) (mkSum n mat) (mat)
140
141
142 int2double :: Matrix Int -> Matrix Double
143 int2double m = run (map fromIntegral $ use m)
144