2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.Aggregates.Cluster
20 import Data.List (null,tail,concat,sort,intersect)
22 import Data.Tuple (fst)
23 import Gargantext.Prelude
24 import Gargantext.Viz.Phylo
25 import Gargantext.Viz.Phylo.Tools
26 import Gargantext.Viz.Phylo.Metrics.Proximity
27 import Gargantext.Viz.Phylo.Metrics.Clustering
28 import Gargantext.Viz.Phylo.Aggregates.Cooc
29 import qualified Data.Map as Map
31 import qualified Data.Vector.Storable as VS
32 import Debug.Trace (trace)
33 import Numeric.Statistics (percentile)
36 -- | Optimisation to filter only relevant candidates
37 getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
38 getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
39 $ filter (\(g,g') -> g /= g')
40 $ listToDirectedCombi gs
43 -- | To transform a Graph into Clusters
44 graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
45 graphToClusters clust (nodes,edges) = case clust of
46 Louvain (LouvainParams _) -> undefined
47 RelatedComponents (RCParams _) -> relatedComp 0 (head' "graphToClusters" nodes) (tail nodes,edges) [] []
48 _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
51 -- | To transform a list of PhyloGroups into a Graph of Proximity
52 groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> Phylo -> ([GroupNode],[GroupEdge])
53 groupsToGraph prox gs cooc p = case prox of
54 WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p
55 $ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
57 Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
62 -- | To filter a Graph of Proximity using a given threshold
63 filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
64 filterGraph prox (ns,es) = case prox of
65 WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
66 Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
70 -- | To clusterise a Phylo
71 phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
72 phyloToClusters lvl clus p = Map.fromList
74 $ map (\g -> if null (fst g)
76 else graphToClusters clus g) graphs'
78 --------------------------------------
79 graphs' :: [([GroupNode],[GroupEdge])]
80 graphs' = traceGraphFiltered lvl
81 $ map (\g -> filterGraph prox g) graphs
82 --------------------------------------
83 graphs :: [([GroupNode],[GroupEdge])]
84 graphs = traceGraph lvl (getThreshold prox)
85 $ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p) p) periods
86 --------------------------------------
88 prox = getProximity clus
89 --------------------------------------
90 periods :: [PhyloPeriodId]
91 periods = getPhyloPeriods p
92 --------------------------------------
100 traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
101 traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
102 <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
103 <> show (lst) <> "\n"
104 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
105 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
106 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
107 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
109 lst = sort $ map snd $ concat $ map snd g
112 traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
113 traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
114 <> "count : " <> show (length lst) <> " edges\n"
115 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
116 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
117 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
118 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
120 lst = sort $ map snd $ concat $ map snd g
123 traceSim :: PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> Map (Int, Int) Double -> Phylo -> Double -> Double
124 traceSim g g' _ _ p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n"
125 -- <> show (c) <> " [vs] " <> show (c') <> " = " <> show (sim)