]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/Cluster.hs
[DB SCHEMA] adding hash_id column to nodes
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / Cluster.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE TemplateHaskell #-}
14
15 module Gargantext.Core.Viz.Phylo.Cluster
16 where
17 import Control.Parallel.Strategies
18 import Data.Graph.Clustering.Louvain.CplusPlus
19 import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
20 import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
21 import Data.Map (Map, fromList, mapKeys)
22 import Data.Tuple (fst)
23 import Gargantext.Prelude
24 import Gargantext.Core.Viz.Phylo
25 import Gargantext.Core.Viz.Phylo.Tools
26 import Gargantext.Core.Viz.Phylo.Metrics
27 import Gargantext.Core.Viz.Phylo.LinkMaker
28 import qualified Data.Map as Map
29
30 import qualified Data.Vector.Storable as VS
31 import Debug.Trace (trace)
32 import Numeric.Statistics (percentile)
33
34
35 --------------
36 -- | Algo | --
37 --------------
38
39
40 relatedComp :: Eq a => [[a]] -> [[a]]
41 relatedComp graphs = foldl' (\mem groups ->
42 if (null mem)
43 then mem ++ [groups]
44 else
45 let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
46 in if (null related)
47 then mem ++ [groups]
48 else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
49
50
51 louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
52 louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
53 <$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
54 <$> (cLouvain "0.0001" $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
55 where
56 --------------------------------------
57 idx :: PhyloGroup -> Int
58 idx e = case elemIndex e nodes of
59 Nothing -> panic "[ERR][Gargantext.Core.Viz.Phylo.Metrics.Clustering] a node is missing"
60 Just i -> i
61 --------------------------------------
62
63
64 -----------------------
65 -- | Cluster Maker | --
66 -----------------------
67
68
69 -- | Optimisation to filter only relevant candidates
70 getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
71 getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
72 $ filter (\(g,g') -> g /= g')
73 $ listToDirectedCombi gs
74
75
76 -- | To transform a Graph into Clusters
77 graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
78 graphToClusters clust (nodes,edges) = case clust of
79 Louvain (LouvainParams _) -> undefined
80 RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
81 _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
82
83
84 -- | To transform a list of PhyloGroups into a Graph of Proximity
85 groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
86 groupsToGraph nbDocs prox gs = case prox of
87 WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
88 $ getCandidates gs
89 candidates' = candidates `using` parList rdeepseq
90 in candidates' )
91 Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
92 _ -> undefined
93
94
95 -- | To filter a Graph of Proximity using a given threshold
96 filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
97 filterGraph prox (ns,es) = case prox of
98 WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
99 Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
100 _ -> undefined
101
102
103 -- | To clusterise a Phylo
104 phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
105 phyloToClusters lvl clus p = Map.fromList
106 $ zip periods
107 $ map (\g -> if null (fst g)
108 then []
109 else graphToClusters clus g) graphs'
110 where
111 --------------------------------------
112 graphs' :: [([GroupNode],[GroupEdge])]
113 graphs' = traceGraphFiltered lvl
114 $ map (\g -> filterGraph prox g) graphs
115 --------------------------------------
116 graphs :: [([GroupNode],[GroupEdge])]
117 graphs = traceGraph lvl (getThreshold prox)
118 $ let gs = (trace $ "PROX: " <> show prox) $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
119 gs' = gs `using` parList rdeepseq
120 in gs'
121 --------------------------------------
122 prox :: Proximity
123 prox = getProximity clus
124 --------------------------------------
125 periods :: [PhyloPeriodId]
126 periods = getPhyloPeriods p
127 --------------------------------------
128
129
130 ----------------
131 -- | Tracer | --
132 ----------------
133
134
135 traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
136 traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
137 <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
138 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
139 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
140 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
141 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
142 where
143 lst = sort $ map snd $ concat $ map snd g
144
145
146 traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
147 traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
148 <> "count : " <> show (length lst) <> " edges\n"
149 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
150 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
151 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
152 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
153 where
154 lst = sort $ map snd $ concat $ map snd g