[FIX] ngrams + ngramsPosTag insert
[gargantext.git] / src / Gargantext / Core / Methods / Distances / Accelerate / SpeGen.hs
1 {-|
2 Module : Gargantext.Core.Methods.Distances.Accelerate.SpeGen
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 This module aims at implementig distances of terms context by context is
11 the same referential of corpus.
12
13 Implementation use Accelerate library which enables GPU and CPU computation
14 See Gargantext.Core.Methods.Graph.Accelerate)
15
16 -}
17
18 {-# LANGUAGE TypeFamilies #-}
19 {-# LANGUAGE TypeOperators #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE ViewPatterns #-}
22
23 module Gargantext.Core.Methods.Distances.Accelerate.SpeGen
24 where
25
26 -- import qualified Data.Foldable as P (foldl1)
27 -- import Debug.Trace (trace)
28 import Data.Array.Accelerate
29 import Data.Array.Accelerate.Interpreter (run)
30 import Gargantext.Core.Methods.Matrix.Accelerate.Utils
31 import qualified Gargantext.Prelude as P
32
33
34 -----------------------------------------------------------------------
35 -----------------------------------------------------------------------
36 -- * Specificity and Genericity
37
38 {- | Metric Specificity and genericity: select terms
39
40 - let N termes and occurrences of i \[N{i}\]
41
42 - Cooccurrences of i and j \[N{ij}\]
43 - Probability to get i given j : \[P(i|j)=N{ij}/N{j}\]
44
45 - Genericity of i \[Gen(i) = \frac{\sum_{j \neq i,j} P(i|j)}{N-1}\]
46 - Specificity of j \[Spec(i) = \frac{\sum_{j \neq i,j} P(j|i)}{N-1}\]
47
48 - \[Inclusion (i) = Gen(i) + Spec(i)\)
49 - \[GenericityScore = Gen(i)- Spec(i)\]
50
51 - References: Science mapping with asymmetrical paradigmatic proximity
52 Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted
53 on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276,
54 arXiv:0803.2315 [cs.OH]
55 -}
56 type GenericityInclusion = Double
57 type SpecificityExclusion = Double
58
59 data SquareMatrix = SymetricMatrix | NonSymetricMatrix
60 type SymetricMatrix = Matrix
61 type NonSymetricMatrix = Matrix
62
63
64 incExcSpeGen :: Matrix Int
65 -> ( Vector GenericityInclusion
66 , Vector SpecificityExclusion
67 )
68 incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
69 where
70 run' fun mat = run $ fun $ map fromIntegral $ use mat
71
72 -- | Inclusion (i) = Gen(i)+Spec(i)
73 inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
74 inclusionExclusion mat = zipWith (+) (pV mat) (pV mat)
75
76 -- | Genericity score = Gen(i)- Spec(i)
77 specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
78 specificityGenericity mat = zipWith (+) (pH mat) (pH mat)
79
80 -- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
81 pV :: Acc (Matrix Double) -> Acc (Vector Double)
82 pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
83
84 -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
85 pH :: Acc (Matrix Double) -> Acc (Vector Double)
86 pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
87
88 cardN :: Exp Double
89 cardN = constant (P.fromIntegral (dim m) :: Double)
90
91
92 -- | P(i|j) = Nij /N(jj) Probability to get i given j
93 --p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
94 p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Matrix e) -> Acc (Matrix e)
95 p_ij m = zipWith (/) m (n_jj m)
96 where
97 n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
98 n_jj myMat' = backpermute (shape m)
99 (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
100 -> (Z :. j :. j)
101 )
102 ) myMat'
103
104 -- | P(j|i) = Nij /N(ii) Probability to get i given j
105 -- to test
106 p_ji :: (Elt e, P.Fractional (Exp e))
107 => Acc (Array DIM2 e)
108 -> Acc (Array DIM2 e)
109 p_ji = transpose . p_ij
110
111
112 -- | Step to ckeck the result in visual/qualitative tests
113 incExcSpeGen_proba :: Matrix Int -> Matrix Double
114 incExcSpeGen_proba m = run' pro m
115 where
116 run' fun mat = run $ fun $ map fromIntegral $ use mat
117
118 pro mat = p_ji mat
119
120 {-
121 -- | Hypothesis to test maybe later (or not)
122 -- TODO ask accelerate for instances to ease such writtings:
123 p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
124 p_ m = zipWith (/) m (n_ m)
125 where
126 n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
127 n_ m = backpermute (shape m)
128 (lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
129 -> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
130 )
131 ) m
132 -}
133