]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Methods/Similarities/Accelerate/Conditional.hs
[facet] some fixes to tfidfAll, small refactoring
[gargantext.git] / src / Gargantext / Core / Methods / Similarities / Accelerate / Conditional.hs
1 {-|
2 Module : Gargantext.Core.Methods.Similarities.Accelerate.Conditional
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 This module aims at implementig distances of terms context by context is
11 the same referential of corpus.
12
13 Implementation use Accelerate library which enables GPU and CPU computation
14 See Gargantext.Core.Methods.Graph.Accelerate)
15
16 -}
17
18 {-# LANGUAGE TypeFamilies #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE ViewPatterns #-}
22
23 module Gargantext.Core.Methods.Similarities.Accelerate.Conditional
24 where
25
26 -- import qualified Data.Foldable as P (foldl1)
27 -- import Debug.Trace (trace)
28 import Data.Array.Accelerate
29 import Data.Array.Accelerate.Interpreter (run)
30 import Gargantext.Core.Methods.Matrix.Accelerate.Utils
31 import Gargantext.Core.Methods.Similarities.Accelerate.SpeGen
32 import qualified Gargantext.Prelude as P
33
34
35 -- * Metrics of proximity
36 -----------------------------------------------------------------------
37 -- ** Conditional distance
38
39 -- *** Conditional distance (basic)
40
41 -- | Conditional distance (basic version)
42 --
43 -- 2 main metrics are actually implemented in order to compute the
44 -- proximity of two terms: conditional and distributional
45 --
46 -- Conditional metric is an absolute metric which reflects
47 -- interactions of 2 terms in the corpus.
48
49 -- Filtered with MiniMax.
50
51 measureConditional :: Matrix Int -> Matrix Double
52 measureConditional m = run $ x $ map fromIntegral $ use m
53 where
54 x :: Acc (Matrix Double) -> Acc (Matrix Double)
55 x mat = matMiniMax $ matProba r mat
56
57 r :: Dim
58 r = dim m
59
60
61 -- | To filter the nodes
62 -- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called
63 -- "confidence" , is the maximum probability between @i@ and @j@ to see
64 -- @i@ in the same context of @j@ knowing @j@.
65 --
66 -- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
67 -- in the corpus and _[n_{ij}\] the number of its occurrences we get:
68 --
69 -- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
70 conditional' :: Matrix Int -> (Matrix GenericityInclusion, Matrix SpecificityExclusion)
71 conditional' m = ( run $ ie $ map fromIntegral $ use m
72 , run $ sg $ map fromIntegral $ use m
73 )
74 where
75 x :: Acc (Matrix Double) -> Acc (Matrix Double)
76 x mat = (matProba r mat)
77
78 xs :: Acc (Matrix Double) -> Acc (Matrix Double)
79 xs mat = let mat' = x mat in zipWith (-) (matSumLin r mat') mat'
80 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
81 ys mat = let mat' = x mat in zipWith (-) (matSumCol r mat') mat'
82
83
84 ie :: Acc (Matrix Double) -> Acc (Matrix Double)
85 ie mat = map (\x' -> x' / (2*(n-1))) $ zipWith (+) (xs mat) (ys mat)
86 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
87 sg mat = map (\x' -> x' / (2*(n-1))) $ zipWith (-) (xs mat) (ys mat)
88
89 r :: Dim
90 r = dim m
91
92 n :: Exp Double
93 n = P.fromIntegral r
94