]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
fix the diagonal issue
[gargantext.git] / src / Gargantext / Viz / Phylo / Metrics / Proximity.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Metrics.Proximity
18 where
19
20 import Data.List (null,union,intersect)
21 import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size,filterWithKey)
22 import Gargantext.Prelude
23 -- import Debug.Trace (trace)
24
25 sumInvLog :: Double -> [Double] -> Double
26 sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
27
28 sumLog :: Double -> [Double] -> Double
29 sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
30
31
32 -- -- | To process WeighedLogJaccard distance between to coocurency matrix
33 -- weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
34 -- weightedLogJaccard sens cooc cooc' nbDocs
35 -- | null union' = 0
36 -- | union' == inter' = 1
37 -- | sens == 0 = (fromIntegral $ length $ keysInter) / (fromIntegral $ length $ keysUnion)
38 -- | sens > 0 = (sumInvLog sens $ elems wInter) / (sumInvLog sens $ elems wUnion)
39 -- | otherwise = (sumLog sens $ elems wInter) / (sumLog sens $ elems wUnion)
40 -- where
41 -- --------------------------------------
42 -- keysInter :: [Int]
43 -- keysInter = nub $ concat $ map (\(x,x') -> [x,x']) $ keys inter'
44 -- --------------------------------------
45 -- keysUnion :: [Int]
46 -- keysUnion = nub $ concat $ map (\(x,x') -> [x,x']) $ keys union'
47 -- --------------------------------------
48 -- wInter :: Map (Int,Int) Double
49 -- wInter = map (/nbDocs) inter'
50 -- --------------------------------------
51 -- wUnion :: Map (Int,Int) Double
52 -- wUnion = map (/nbDocs) union'
53 -- --------------------------------------
54 -- inter' :: Map (Int, Int) Double
55 -- inter' = intersectionWith (+) cooc cooc'
56 -- --------------------------------------
57 -- union' :: Map (Int, Int) Double
58 -- union' = unionWith (+) cooc cooc'
59 -- --------------------------------------
60
61
62 -- | To compute a jaccard similarity between two lists
63 jaccard :: [Int] -> [Int] -> Double
64 jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
65
66
67 -- | To get the diagonal of a matrix
68 toDiago :: Map (Int, Int) Double -> [Double]
69 toDiago cooc = elems $ filterWithKey (\(x,x') _ -> x == x') cooc
70
71
72 -- | To process WeighedLogJaccard distance between to coocurency matrix
73 weightedLogJaccard :: Double -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
74 weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
75 | null gInter = 0
76 | gInter == gUnion = 1
77 | sens == 0 = jaccard gInter gUnion
78 | sens > 0 = (sumInvLog sens wInter) / (sumInvLog sens wUnion)
79 | otherwise = (sumLog sens wInter) / (sumLog sens wUnion)
80 where
81 --------------------------------------
82 gInter :: [Int]
83 gInter = intersect ngrams ngrams'
84 --------------------------------------
85 gUnion :: [Int]
86 gUnion = union ngrams ngrams'
87 --------------------------------------
88 wInter :: [Double]
89 wInter = toDiago $ map (/nbDocs) $ intersectionWith (+) cooc cooc'
90 --------------------------------------
91 wUnion :: [Double]
92 wUnion = toDiago $ map (/nbDocs) $ unionWith (+) cooc cooc'
93 --------------------------------------
94
95
96
97 -- | To process the Hamming distance between two PhyloGroup fields
98 hamming :: Map (Int, Int) Double -> Map (Int, Int) Double -> Double
99 hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (size f2))
100 where
101 --------------------------------------
102 inter :: Map (Int, Int) Double
103 inter = intersection f1 f2
104 --------------------------------------