2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TemplateHaskell #-}
18 module Gargantext.Viz.Phylo.Cluster
20 import Control.Parallel.Strategies
21 import Data.Graph.Clustering.Louvain.CplusPlus
22 import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
23 import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
24 import Data.Map (Map, fromList, mapKeys)
25 import Data.Tuple (fst)
26 import Gargantext.Prelude
27 import Gargantext.Viz.Phylo
28 import Gargantext.Viz.Phylo.Tools
29 import Gargantext.Viz.Phylo.Metrics
30 import Gargantext.Viz.Phylo.LinkMaker
31 import qualified Data.Map as Map
33 import qualified Data.Vector.Storable as VS
34 import Debug.Trace (trace)
35 import Numeric.Statistics (percentile)
43 relatedComp :: Eq a => [[a]] -> [[a]]
44 relatedComp graphs = foldl' (\mem groups ->
48 let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
51 else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
54 louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
55 louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
56 <$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
57 <$> (cLouvain "0.0001" $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
59 --------------------------------------
60 idx :: PhyloGroup -> Int
61 idx e = case elemIndex e nodes of
62 Nothing -> panic "[ERR][Gargantext.Viz.Phylo.Metrics.Clustering] a node is missing"
64 --------------------------------------
67 -----------------------
68 -- | Cluster Maker | --
69 -----------------------
72 -- | Optimisation to filter only relevant candidates
73 getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
74 getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
75 $ filter (\(g,g') -> g /= g')
76 $ listToDirectedCombi gs
79 -- | To transform a Graph into Clusters
80 graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
81 graphToClusters clust (nodes,edges) = case clust of
82 Louvain (LouvainParams _) -> undefined
83 RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
84 _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
87 -- | To transform a list of PhyloGroups into a Graph of Proximity
88 groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
89 groupsToGraph nbDocs prox gs = case prox of
90 WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
92 candidates' = candidates `using` parList rdeepseq
94 Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
98 -- | To filter a Graph of Proximity using a given threshold
99 filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
100 filterGraph prox (ns,es) = case prox of
101 WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
102 Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
106 -- | To clusterise a Phylo
107 phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
108 phyloToClusters lvl clus p = Map.fromList
110 $ map (\g -> if null (fst g)
112 else graphToClusters clus g) graphs'
114 --------------------------------------
115 graphs' :: [([GroupNode],[GroupEdge])]
116 graphs' = traceGraphFiltered lvl
117 $ map (\g -> filterGraph prox g) graphs
118 --------------------------------------
119 graphs :: [([GroupNode],[GroupEdge])]
120 graphs = traceGraph lvl (getThreshold prox)
121 $ let gs = (trace $ "PROX: " <> show prox) $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
122 gs' = gs `using` parList rdeepseq
124 --------------------------------------
126 prox = getProximity clus
127 --------------------------------------
128 periods :: [PhyloPeriodId]
129 periods = getPhyloPeriods p
130 --------------------------------------
138 traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
139 traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
140 <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
141 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
142 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
143 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
144 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
146 lst = sort $ map snd $ concat $ map snd g
149 traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
150 traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
151 <> "count : " <> show (length lst) <> " edges\n"
152 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
153 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
154 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
155 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
157 lst = sort $ map snd $ concat $ map snd g