]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/Cluster.hs
[phylo] working on file naming function. Now OK2 but branch length is still missing
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / Cluster.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE TemplateHaskell #-}
14
15 module Gargantext.Core.Viz.Phylo.Cluster
16 where
17 import Control.Parallel.Strategies
18 import Data.Graph.Clustering.Louvain.CplusPlus
19 import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
20 import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
21 import Data.Map (Map, fromList, mapKeys)
22 import Gargantext.Prelude
23 import Gargantext.Core.Viz.Phylo
24 import Gargantext.Core.Viz.Phylo.Tools
25 import Gargantext.Core.Viz.Phylo.Metrics
26 import Gargantext.Core.Viz.Phylo.LinkMaker
27 import qualified Data.Map as Map
28
29 import qualified Data.Vector.Storable as VS
30 import Debug.Trace (trace)
31 import Numeric.Statistics (percentile)
32
33
34 --------------
35 -- | Algo | --
36 --------------
37
38
39 relatedComp :: Eq a => [[a]] -> [[a]]
40 relatedComp graphs = foldl' (\mem groups ->
41 if (null mem)
42 then mem ++ [groups]
43 else
44 let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
45 in if (null related)
46 then mem ++ [groups]
47 else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
48
49
50 louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
51 louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
52 <$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
53 <$> (cLouvain "0.0001" $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
54 where
55 --------------------------------------
56 idx :: PhyloGroup -> Int
57 idx e = case elemIndex e nodes of
58 Nothing -> panic "[ERR][Gargantext.Core.Viz.Phylo.Metrics.Clustering] a node is missing"
59 Just i -> i
60 --------------------------------------
61
62
63 -----------------------
64 -- | Cluster Maker | --
65 -----------------------
66
67
68 -- | Optimisation to filter only relevant candidates
69 getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
70 getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
71 $ filter (\(g,g') -> g /= g')
72 $ listToDirectedCombi gs
73
74
75 -- | To transform a Graph into Clusters
76 graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
77 graphToClusters clust (nodes,edges) = case clust of
78 Louvain (LouvainParams _) -> undefined
79 RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
80 _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
81
82
83 -- | To transform a list of PhyloGroups into a Graph of Proximity
84 groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
85 groupsToGraph nbDocs prox gs = case prox of
86 WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
87 $ getCandidates gs
88 candidates' = candidates `using` parList rdeepseq
89 in candidates' )
90 Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
91 _ -> undefined
92
93
94 -- | To filter a Graph of Proximity using a given threshold
95 filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
96 filterGraph prox (ns,es) = case prox of
97 WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
98 Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
99 _ -> undefined
100
101
102 -- | To clusterise a Phylo
103 phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
104 phyloToClusters lvl clus p = Map.fromList
105 $ zip periods
106 $ map (\g -> if null (fst g)
107 then []
108 else graphToClusters clus g) graphs'
109 where
110 --------------------------------------
111 graphs' :: [([GroupNode],[GroupEdge])]
112 graphs' = traceGraphFiltered lvl
113 $ map (\g -> filterGraph prox g) graphs
114 --------------------------------------
115 graphs :: [([GroupNode],[GroupEdge])]
116 graphs = traceGraph lvl (getThreshold prox)
117 $ let gs = (trace $ "PROX: " <> show prox) $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
118 gs' = gs `using` parList rdeepseq
119 in gs'
120 --------------------------------------
121 prox :: Proximity
122 prox = getProximity clus
123 --------------------------------------
124 periods :: [PhyloPeriodId]
125 periods = getPhyloPeriods p
126 --------------------------------------
127
128
129 ----------------
130 -- | Tracer | --
131 ----------------
132
133
134 traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
135 traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
136 <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
137 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
138 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
139 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
140 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
141 where
142 lst = sort $ map snd $ concat $ map snd g
143
144
145 traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
146 traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
147 <> "count : " <> show (length lst) <> " edges\n"
148 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
149 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
150 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
151 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
152 where
153 lst = sort $ map snd $ concat $ map snd g