2 Module : Gargantext.Core.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE TemplateHaskell #-}
15 module Gargantext.Core.Viz.Phylo.Cluster
17 import Control.Parallel.Strategies
18 import Data.Graph.Clustering.Louvain.CplusPlus
19 import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
20 import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
21 import Data.Map (Map, fromList, mapKeys)
22 import Gargantext.Prelude
23 import Gargantext.Core.Viz.Phylo
24 import Gargantext.Core.Viz.Phylo.Tools
25 import Gargantext.Core.Viz.Phylo.Metrics
26 import Gargantext.Core.Viz.Phylo.LinkMaker
27 import qualified Data.Map as Map
29 import qualified Data.Vector.Storable as VS
30 import Debug.Trace (trace)
31 import Numeric.Statistics (percentile)
39 relatedComp :: Eq a => [[a]] -> [[a]]
40 relatedComp graphs = foldl' (\mem groups ->
44 let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
47 else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
50 louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
51 louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
52 <$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
53 <$> (cLouvain "0.0001" $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
55 --------------------------------------
56 idx :: PhyloGroup -> Int
57 idx e = case elemIndex e nodes of
58 Nothing -> panic "[ERR][Gargantext.Core.Viz.Phylo.Metrics.Clustering] a node is missing"
60 --------------------------------------
63 -----------------------
64 -- | Cluster Maker | --
65 -----------------------
68 -- | Optimisation to filter only relevant candidates
69 getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
70 getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
71 $ filter (\(g,g') -> g /= g')
72 $ listToDirectedCombi gs
75 -- | To transform a Graph into Clusters
76 graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
77 graphToClusters clust (nodes,edges) = case clust of
78 Louvain (LouvainParams _) -> undefined
79 RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
80 _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
83 -- | To transform a list of PhyloGroups into a Graph of Proximity
84 groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
85 groupsToGraph nbDocs prox gs = case prox of
86 WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
88 candidates' = candidates `using` parList rdeepseq
90 Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
94 -- | To filter a Graph of Proximity using a given threshold
95 filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
96 filterGraph prox (ns,es) = case prox of
97 WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
98 Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
102 -- | To clusterise a Phylo
103 phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
104 phyloToClusters lvl clus p = Map.fromList
106 $ map (\g -> if null (fst g)
108 else graphToClusters clus g) graphs'
110 --------------------------------------
111 graphs' :: [([GroupNode],[GroupEdge])]
112 graphs' = traceGraphFiltered lvl
113 $ map (\g -> filterGraph prox g) graphs
114 --------------------------------------
115 graphs :: [([GroupNode],[GroupEdge])]
116 graphs = traceGraph lvl (getThreshold prox)
117 $ let gs = (trace $ "PROX: " <> show prox) $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
118 gs' = gs `using` parList rdeepseq
120 --------------------------------------
122 prox = getProximity clus
123 --------------------------------------
124 periods :: [PhyloPeriodId]
125 periods = getPhyloPeriods p
126 --------------------------------------
134 traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
135 traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
136 <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
137 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
138 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
139 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
140 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
142 lst = sort $ map snd $ concat $ map snd g
145 traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
146 traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
147 <> "count : " <> show (length lst) <> " edges\n"
148 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
149 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
150 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
151 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
153 lst = sort $ map snd $ concat $ map snd g