]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Graph/Distances/Matrice.hs
[Pipeline] clustering with C++ Louvain bindings, ok.
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Matrice.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Matrix
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11
12 Implementation use Accelerate library :
13 * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
14 [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
15 In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
16
17 * Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
18 [Optimising Purely Functional GPU Programs][MCKL13].
19 In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
20
21 * Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
22 [Embedding Foreign Code][CMCK14].
23 In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
24
25 * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
26 [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
27 In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
28
29 -}
30
31 {-# LANGUAGE NoImplicitPrelude #-}
32 {-# LANGUAGE FlexibleContexts #-}
33 {-# LANGUAGE TypeFamilies #-}
34 {-# LANGUAGE TypeOperators #-}
35
36 module Gargantext.Viz.Graph.Distances.Matrice
37 where
38
39 import Data.Array.Accelerate
40 import Data.Array.Accelerate.Interpreter (run)
41 import Data.Array.Accelerate.Smart
42 import Data.Array.Accelerate.Type
43 import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
44
45 import Data.Maybe (Maybe(Just))
46 import qualified Gargantext.Prelude as P
47 import qualified Data.Array.Accelerate.Array.Representation as Repr
48
49 import Gargantext.Text.Metrics.Occurrences
50
51
52 -----------------------------------------------------------------------
53 -- Test perf.
54 distriTest = distributional $ myMat 100
55 -----------------------------------------------------------------------
56
57 vector :: Int -> (Array (Z :. Int) Int)
58 vector n = fromList (Z :. n) [0..n]
59
60 matrix :: Elt c => Int -> [c] -> Matrix c
61 matrix n l = fromList (Z :. n :. n) l
62
63 myMat :: Int -> Matrix Int
64 myMat n = matrix n [1..]
65
66 -- | Two ways to get the rank (as documentation)
67 rank :: (Matrix a) -> Int
68 rank m = arrayRank $ arrayShape m
69
70 rank' :: (Matrix a) -> Int
71 rank' m = n
72 where
73 Z :. _ :. n = arrayShape m
74
75 -----------------------------------------------------------------------
76 -- | Conditional Distance
77
78 type Rank = Int
79
80 proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
81 proba r mat = zipWith (/) mat (mkSum r mat)
82
83 mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
84 mkSum r mat = replicate (constant (Z :. (r :: Int) :. All))
85 $ fold (+) 0 mat
86
87
88 type Matrix' a = Acc (Matrix a)
89 type InclusionExclusion = Double
90 type SpecificityGenericity = Double
91
92
93 miniMax :: Matrix' Double -> Matrix' Double
94 miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
95 where
96 miniMax' = (the $ minimum $ maximum m)
97
98 conditional :: Matrix Int -> Matrix Double
99 conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
100 where
101 r :: Rank
102 r = rank' m
103
104
105 conditional' :: Matrix Double -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
106 conditional' m = (run $ ie (use m), run $ sg (use m))
107 where
108
109 ie :: Matrix' Double -> Matrix' Double
110 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
111 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
112 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
113
114 n :: Exp Double
115 n = P.fromIntegral r
116
117 r :: Rank
118 r = rank' m
119
120 xs :: Matrix' Double -> Matrix' Double
121 xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
122 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
123 ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
124
125 -- filter with threshold
126 -----------------------------------------------------------------------
127
128 -- | Distributional Distance
129
130 distributional :: Matrix Int -> Matrix Double
131 distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
132 where
133 n = rank' m
134
135 filter m = zipWith (\a b -> max a b) m (transpose m)
136
137 ri mat = zipWith (/) mat1 mat2
138 where
139 mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
140 mat2 = mkSum n mat
141
142 mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
143 $ zipWith (/) (crossProduct m') (total m')
144
145 total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
146
147 crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
148 cross mat = zipWith (-) (mkSum n mat) (mat)
149
150
151 int2double :: Matrix Int -> Matrix Double
152 int2double m = run (map fromIntegral $ use m)
153