2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TemplateHaskell #-}
18 module Gargantext.Viz.Phylo.Cluster
20 import Control.Parallel.Strategies
21 import Data.Graph.Clustering.Louvain.CplusPlus
22 import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
23 import Data.Map (Map, fromList, mapKeys)
24 import Data.Tuple (fst)
25 import Gargantext.Prelude
26 import Gargantext.Viz.Phylo
27 import Gargantext.Viz.Phylo.Tools
28 import Gargantext.Viz.Phylo.Metrics
29 import Gargantext.Viz.Phylo.LinkMaker
30 import qualified Data.Map as Map
32 import qualified Data.Vector.Storable as VS
33 import Debug.Trace (trace)
34 import Numeric.Statistics (percentile)
42 relatedComp :: Eq a => [[a]] -> [[a]]
43 relatedComp graphs = foldl' (\mem groups ->
47 let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
50 else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
53 louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
54 louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
55 <$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
56 <$> (cLouvain $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
58 --------------------------------------
59 idx :: PhyloGroup -> Int
60 idx e = case elemIndex e nodes of
61 Nothing -> panic "[ERR][Gargantext.Viz.Phylo.Metrics.Clustering] a node is missing"
63 --------------------------------------
66 -----------------------
67 -- | Cluster Maker | --
68 -----------------------
71 -- | Optimisation to filter only relevant candidates
72 getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
73 getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
74 $ filter (\(g,g') -> g /= g')
75 $ listToDirectedCombi gs
78 -- | To transform a Graph into Clusters
79 graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
80 graphToClusters clust (nodes,edges) = case clust of
81 Louvain (LouvainParams _) -> undefined
82 RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
83 _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
86 -- | To transform a list of PhyloGroups into a Graph of Proximity
87 groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
88 groupsToGraph nbDocs prox gs = case prox of
89 WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
91 candidates' = candidates `using` parList rdeepseq
93 Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
97 -- | To filter a Graph of Proximity using a given threshold
98 filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
99 filterGraph prox (ns,es) = case prox of
100 WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
101 Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
105 -- | To clusterise a Phylo
106 phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
107 phyloToClusters lvl clus p = Map.fromList
109 $ map (\g -> if null (fst g)
111 else graphToClusters clus g) graphs'
113 --------------------------------------
114 graphs' :: [([GroupNode],[GroupEdge])]
115 graphs' = traceGraphFiltered lvl
116 $ map (\g -> filterGraph prox g) graphs
117 --------------------------------------
118 graphs :: [([GroupNode],[GroupEdge])]
119 graphs = traceGraph lvl (getThreshold prox)
120 $ let gs = (trace $ "PROX: " <> show prox) $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
121 gs' = gs `using` parList rdeepseq
123 --------------------------------------
125 prox = getProximity clus
126 --------------------------------------
127 periods :: [PhyloPeriodId]
128 periods = getPhyloPeriods p
129 --------------------------------------
137 traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
138 traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
139 <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
140 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
141 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
142 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
143 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
145 lst = sort $ map snd $ concat $ map snd g
148 traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
149 traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
150 <> "count : " <> show (length lst) <> " edges\n"
151 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
152 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
153 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
154 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
156 lst = sort $ map snd $ concat $ map snd g