]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
add rebranching to link distante branches
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Cluster.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Aggregates.Cluster
18 where
19
20 import Data.List (null,tail,concat,sort,intersect)
21 import Data.Map (Map)
22 import Data.Tuple (fst)
23 import Gargantext.Prelude
24 import Gargantext.Viz.Phylo
25 import Gargantext.Viz.Phylo.Tools
26 import Gargantext.Viz.Phylo.Metrics.Proximity
27 import Gargantext.Viz.Phylo.Metrics.Clustering
28 import Gargantext.Viz.Phylo.LinkMaker
29 import qualified Data.Map as Map
30
31 import qualified Data.Vector.Storable as VS
32 import Debug.Trace (trace)
33 import Numeric.Statistics (percentile)
34
35
36 -- | Optimisation to filter only relevant candidates
37 getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
38 getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
39 $ filter (\(g,g') -> g /= g')
40 $ listToDirectedCombi gs
41
42
43 -- | To transform a Graph into Clusters
44 graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
45 graphToClusters clust (nodes,edges) = case clust of
46 Louvain (LouvainParams _) -> undefined
47 RelatedComponents (RCParams _) -> relatedComp 0 (head' "graphToClusters" nodes) (tail nodes,edges) [] []
48 _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
49
50
51 -- | To transform a list of PhyloGroups into a Graph of Proximity
52 groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
53 groupsToGraph nbDocs prox gs = case prox of
54 WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard sens (getGroupCooc x) (getGroupCooc y) nbDocs))
55 $ getCandidates gs)
56 Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
57 _ -> undefined
58
59
60 -- | To filter a Graph of Proximity using a given threshold
61 filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
62 filterGraph prox (ns,es) = case prox of
63 WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
64 Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
65 _ -> undefined
66
67
68 -- | To clusterise a Phylo
69 phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
70 phyloToClusters lvl clus p = Map.fromList
71 $ zip periods
72 $ map (\g -> if null (fst g)
73 then []
74 else graphToClusters clus g) graphs'
75 where
76 --------------------------------------
77 graphs' :: [([GroupNode],[GroupEdge])]
78 graphs' = traceGraphFiltered lvl
79 $ map (\g -> filterGraph prox g) graphs
80 --------------------------------------
81 graphs :: [([GroupNode],[GroupEdge])]
82 graphs = traceGraph lvl (getThreshold prox)
83 $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
84 --------------------------------------
85 prox :: Proximity
86 prox = getProximity clus
87 --------------------------------------
88 periods :: [PhyloPeriodId]
89 periods = getPhyloPeriods p
90 --------------------------------------
91
92
93 ----------------
94 -- | Tracer | --
95 ----------------
96
97
98 traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
99 traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
100 <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
101 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
102 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
103 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
104 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
105 where
106 lst = sort $ map snd $ concat $ map snd g
107
108
109 traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
110 traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
111 <> "count : " <> show (length lst) <> " edges\n"
112 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
113 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
114 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
115 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
116 where
117 lst = sort $ map snd $ concat $ map snd g
118
119
120 -- traceSim :: PhyloGroup -> PhyloGroup -> Phylo -> Double -> Double
121 -- traceSim g g' p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n") sim