]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Methods/Distances/Accelerate/Conditional.hs
[ngrams] add score update endpoint + sorting
[gargantext.git] / src / Gargantext / Core / Methods / Distances / Accelerate / Conditional.hs
1 {-|
2 Module : Gargantext.Core.Methods.Distances.Accelerate.Conditional
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 This module aims at implementig distances of terms context by context is
11 the same referential of corpus.
12
13 Implementation use Accelerate library which enables GPU and CPU computation
14 See Gargantext.Core.Methods.Graph.Accelerate)
15
16 -}
17
18 {-# LANGUAGE TypeFamilies #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE ViewPatterns #-}
22
23 module Gargantext.Core.Methods.Distances.Accelerate.Conditional
24 where
25
26 -- import qualified Data.Foldable as P (foldl1)
27 -- import Debug.Trace (trace)
28 import Data.Array.Accelerate
29 import Data.Array.Accelerate.Interpreter (run)
30 import Gargantext.Core.Methods.Matrix.Accelerate.Utils
31 import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
32 import qualified Gargantext.Prelude as P
33
34
35 -- * Metrics of proximity
36 -----------------------------------------------------------------------
37 -- ** Conditional distance
38
39 -- *** Conditional distance (basic)
40
41 -- | Conditional distance (basic version)
42 --
43 -- 2 main metrics are actually implemented in order to compute the
44 -- proximity of two terms: conditional and distributional
45 --
46 -- Conditional metric is an absolute metric which reflects
47 -- interactions of 2 terms in the corpus.
48 measureConditional :: Matrix Int -> Matrix Double
49 measureConditional m = run $ zipWith (/) m' (matSumCol d m')
50 where
51 m' = map fromIntegral (use m)
52 d = dim m
53
54
55 -- *** Conditional distance (advanced)
56
57 -- | Conditional distance (advanced version)
58 --
59 -- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called
60 -- "confidence" , is the maximum probability between @i@ and @j@ to see
61 -- @i@ in the same context of @j@ knowing @j@.
62 --
63 -- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
64 -- in the corpus and _[n_{ij}\] the number of its occurrences we get:
65 --
66 -- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
67 conditional' :: Matrix Int -> (Matrix GenericityInclusion, Matrix SpecificityExclusion)
68 conditional' m = ( run $ ie $ map fromIntegral $ use m
69 , run $ sg $ map fromIntegral $ use m
70 )
71 where
72 ie :: Acc (Matrix Double) -> Acc (Matrix Double)
73 ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
74 sg :: Acc (Matrix Double) -> Acc (Matrix Double)
75 sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
76
77 n :: Exp Double
78 n = P.fromIntegral r
79
80 r :: Dim
81 r = dim m
82
83 xs :: Acc (Matrix Double) -> Acc (Matrix Double)
84 xs mat = zipWith (-) (matSumCol r $ matProba r mat) (matProba r mat)
85 ys :: Acc (Matrix Double) -> Acc (Matrix Double)
86 ys mat = zipWith (-) (matSumCol r $ transpose $ matProba r mat) (matProba r mat)
87