2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE FlexibleContexts #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15 {-# LANGUAGE ViewPatterns #-}
17 module Gargantext.Viz.Phylo.Metrics
20 import Gargantext.Prelude
22 import Data.List ((\\), sortOn, concat, nub, take, union, intersect, null)
23 import Data.Map (Map, foldlWithKey, toList, size, unionWith, intersection, intersectionWith, filterWithKey, elems, fromList, findWithDefault)
24 import Data.Text (Text)
26 -- import Debug.Trace (trace)
33 -- | Return the conditional probability of i knowing j
34 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
35 conditional m i j = (findWithDefault 0 (i,j) m)
36 / foldlWithKey (\s (x,_) v -> if x == j
41 -- | Return the genericity score of a given ngram
42 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
43 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
44 - (sum $ map (\j -> conditional m j i) l)) / 2
47 -- | Return the specificity score of a given ngram
48 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
49 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
50 - (sum $ map (\j -> conditional m i j) l)) / 2
53 -- | Return the coverage score of a given ngram
54 coverage :: Map (Int, Int) Double -> [Int] -> Int -> Double
55 coverage m l i = ( (sum $ map (\j -> conditional m j i) l)
56 + (sum $ map (\j -> conditional m i j) l)) / 2
59 -- | Process some metrics on top of ngrams
60 getNgramsMeta :: Map (Int, Int) Double -> [Int] -> Map Text [Double]
61 getNgramsMeta m ngrams = fromList
62 [ ("genericity" , map (\n -> genericity m (ngrams \\ [n]) n) ngrams ),
63 ("specificity", map (\n -> specificity m (ngrams \\ [n]) n) ngrams ),
64 ("coverage" , map (\n -> coverage m (ngrams \\ [n]) n) ngrams )]
67 -- | To get the nth most occurent elems in a coocurency matrix
68 getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
69 getNthMostOcc nth cooc = (nub . concat)
70 $ map (\((idx,idx'),_) -> [idx,idx'])
73 $ sortOn snd $ toList cooc
76 -------------------------
77 -- | Ngrams Dynamics | --
78 -------------------------
86 -- | Process the inverse sumLog
87 sumInvLog :: Double -> [Double] -> Double
88 sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
91 -- | Process the sumLog
92 sumLog :: Double -> [Double] -> Double
93 sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
96 -- | To compute a jaccard similarity between two lists
97 jaccard :: [Int] -> [Int] -> Double
98 jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
101 -- | To get the diagonal of a matrix
102 toDiago :: Map (Int, Int) Double -> [Double]
103 toDiago cooc = elems $ filterWithKey (\(x,x') _ -> x == x') cooc
106 -- | To process WeighedLogJaccard distance between to coocurency matrix
107 weightedLogJaccard :: Double -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
108 weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
110 | gInter == gUnion = 1
111 | sens == 0 = jaccard gInter gUnion
112 | sens > 0 = (sumInvLog sens wInter) / (sumInvLog sens wUnion)
113 | otherwise = (sumLog sens wInter) / (sumLog sens wUnion)
115 --------------------------------------
117 gInter = intersect ngrams ngrams'
118 --------------------------------------
120 gUnion = union ngrams ngrams'
121 --------------------------------------
123 wInter = toDiago $ map (/nbDocs) $ intersectionWith (+) cooc cooc'
124 --------------------------------------
126 wUnion = toDiago $ map (/nbDocs) $ unionWith (+) cooc cooc'
127 --------------------------------------
130 -- | To process the Hamming distance between two PhyloGroup fields
131 hamming :: Map (Int, Int) Double -> Map (Int, Int) Double -> Double
132 hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (size f2))
134 --------------------------------------
135 inter :: Map (Int, Int) Double
136 inter = intersection f1 f2
137 --------------------------------------