]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
add trace to cluster and optimisation to find candidates
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Cluster.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Aggregates.Cluster
18 where
19
20 import Data.List (null,tail,concat)
21 import Data.Map (Map)
22 import Data.Tuple (fst)
23 import Gargantext.Prelude
24 import Gargantext.Viz.Phylo
25 import Gargantext.Viz.Phylo.Tools
26 import Gargantext.Viz.Phylo.Metrics.Proximity
27 import Gargantext.Viz.Phylo.Metrics.Clustering
28 import qualified Data.Map as Map
29
30 import qualified Data.Vector.Storable as VS
31 import Debug.Trace (trace)
32 import Numeric.Statistics (percentile)
33
34
35 -- | To transform a Graph into Clusters
36 graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
37 graphToClusters clust (nodes,edges) = case clust of
38 Louvain (LouvainParams _) -> undefined
39 RelatedComponents (RCParams _) -> relatedComp 0 (head' "graphToClusters" nodes) (tail nodes,edges) [] []
40 _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
41
42
43 -- | To transform a list of PhyloGroups into a Graph of Proximity
44 groupsToGraph :: Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
45 groupsToGraph prox gs = case prox of
46 WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard sens (getGroupCooc x) (getGroupCooc y)))
47 $ listToDirectedCombi gs)
48 Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y)))
49 $ listToDirectedCombi gs)
50 _ -> undefined
51
52
53 -- | To filter a Graph of Proximity using a given threshold
54 filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
55 filterGraph prox (ns,es) = case prox of
56 WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
57 Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
58 _ -> undefined
59
60
61 -- | To clusterise a Phylo
62 phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
63 phyloToClusters lvl clus p = Map.fromList
64 $ zip periods
65 $ map (\g -> if null (fst g)
66 then []
67 else graphToClusters clus g) graphs'
68 where
69 --------------------------------------
70 graphs' :: [([GroupNode],[GroupEdge])]
71 graphs' = traceGraphFiltered lvl
72 $ map (\g -> filterGraph prox g) graphs
73 --------------------------------------
74 graphs :: [([GroupNode],[GroupEdge])]
75 graphs = traceGraph lvl (getThreshold prox)
76 $ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p)) periods
77 --------------------------------------
78 prox :: Proximity
79 prox = getProximity clus
80 --------------------------------------
81 periods :: [PhyloPeriodId]
82 periods = getPhyloPeriods p
83 --------------------------------------
84
85
86 ----------------
87 -- | Tracer | --
88 ----------------
89
90
91 traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
92 traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
93 <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
94 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
95 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
96 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
97 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
98 where
99 lst = map snd $ concat $ map snd g
100
101
102 traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
103 traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
104 <> "count : " <> show (length lst) <> " edges\n"
105 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
106 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
107 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
108 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
109 where
110 lst = map snd $ concat $ map snd g
111