[FIX] Score by Doc or Corpus.
[gargantext.git] / src / Gargantext / Viz / Graph / Distances / Conditional.hs
1 {-|
2 Module : Gargantext.Graph.Distances.Conditional
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Motivation and definition of the @Conditional@ distance.
11 -}
12
13 {-# LANGUAGE BangPatterns #-}
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE FlexibleContexts #-}
16 {-# LANGUAGE Strict #-}
17 module Gargantext.Viz.Graph.Distances.Conditional
18 where
19
20 import Data.Matrix hiding (identity)
21
22 import Data.List (sortOn)
23
24 import Data.Map (Map)
25 import qualified Data.Map as M
26
27 import qualified Data.Set as S
28
29 import qualified Data.Vector as V
30
31 import Gargantext.Prelude
32 import Gargantext.Viz.Graph.Utils
33
34 ------------------------------------------------------------------------
35 ------------------------------------------------------------------------
36 -- | Optimisation issue
37
38 toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
39 toBeOptimized m = proba Col m
40
41 ------------------------------------------------------------------------
42 -- | Main Functions
43 -- Compute the probability from axis
44 -- x' = x / (sum Col x)
45 proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
46 proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
47
48
49 mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
50 mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
51 where
52 f' m' c = mapOnly a f c m'
53
54 mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
55 mapOnly Col = mapCol
56 mapOnly Row = mapRow
57
58 mapAll :: (a -> a) -> Matrix a -> Matrix a
59 mapAll f m = mapOn Col (\_ -> f) m
60
61
62 ---------------------------------------------------------------
63 -- | Compute a distance from axis
64 -- xs = (sum Col x') - x'
65 distFromSum :: (Num a, Fractional a)
66 => Axis -> Matrix a -> Matrix a
67 distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
68 ---------------------------------------------------------------
69 ---------------------------------------------------------------
70 -- | To compute included/excluded or specific/generic scores
71 opWith :: (Fractional a1, Num a1)
72 => (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
73 opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
74 where
75 n = fromIntegral $ nOf Col xs
76 ---------------------------------------------------------------
77
78
79 -------------------------------------------------------
80 conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
81 conditional m = filterMat (threshold m') m'
82 where
83 ------------------------------------------------------------------------
84 -- | Main Operations
85 -- x' = x / (sum Col x)
86 x' = proba Col m
87
88 ------------------------------------------------------------------------
89 -- xs = (sum Col x') - x'
90 xs = distFromSum Col x'
91 -- ys = (sum Row x') - x'
92 ys = distFromSum Row x'
93
94 ------------------------------------------------------------------------
95 -- | Top included or excluded
96 ie = opWith (+) xs ys
97 -- ie = ( xs + ys) / (2 * (x.shape[0] - 1))
98
99 -- | Top specific or generic
100 sg = opWith (-) xs ys
101 -- sg = ( xs - ys) / (2 * (x.shape[0] - 1))
102
103 nodes_kept :: [Int]
104 nodes_kept = take k' $ S.toList
105 $ foldl' (\s (n1,n2) -> insert [n1,n2] s) S.empty
106 $ map fst
107 $ nodes_included k <> nodes_specific k
108
109 nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
110 nodes_specific n = take n $ sortOn snd $ toListsWithIndex sg
111 insert as s = foldl' (\s' a -> S.insert a s') s as
112 k' = 2*k
113 k = 10
114
115 dico_nodes :: Map Int Int
116 dico_nodes = M.fromList $ zip ([1..] :: [Int]) nodes_kept
117 --dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
118
119 m' = matrix (length nodes_kept)
120 (length nodes_kept)
121 (\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')
122
123 threshold m'' = V.minimum $ V.map (\cId -> V.maximum $ getCol cId m'') (V.enumFromTo 1 (nOf Col m''))
124
125 filterMat t m'' = mapAll (\x -> filter' t x) m''
126 where
127 filter' t' x = case (x >= t') of
128 True -> x
129 False -> 0
130
131 ------------------------------------------------------------------------