]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Cluster.hs
[docker] update image, add README info
[gargantext.git] / src / Gargantext / Viz / Phylo / Cluster.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TemplateHaskell #-}
17
18 module Gargantext.Viz.Phylo.Cluster
19 where
20 import Control.Parallel.Strategies
21 import Data.Graph.Clustering.Louvain.CplusPlus
22 import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
23 import Data.Map (Map, fromList, mapKeys)
24 import Data.Tuple (fst)
25 import Gargantext.Prelude
26 import Gargantext.Viz.Phylo
27 import Gargantext.Viz.Phylo.Tools
28 import Gargantext.Viz.Phylo.Metrics
29 import Gargantext.Viz.Phylo.LinkMaker
30 import qualified Data.Map as Map
31
32 import qualified Data.Vector.Storable as VS
33 import Debug.Trace (trace)
34 import Numeric.Statistics (percentile)
35
36
37 --------------
38 -- | Algo | --
39 --------------
40
41
42 relatedComp :: Eq a => [[a]] -> [[a]]
43 relatedComp graphs = foldl' (\mem groups ->
44 if (null mem)
45 then mem ++ [groups]
46 else
47 let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
48 in if (null related)
49 then mem ++ [groups]
50 else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
51
52
53 louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
54 louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
55 <$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
56 <$> (cLouvain "0.0001" $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
57 where
58 --------------------------------------
59 idx :: PhyloGroup -> Int
60 idx e = case elemIndex e nodes of
61 Nothing -> panic "[ERR][Gargantext.Viz.Phylo.Metrics.Clustering] a node is missing"
62 Just i -> i
63 --------------------------------------
64
65
66 -----------------------
67 -- | Cluster Maker | --
68 -----------------------
69
70
71 -- | Optimisation to filter only relevant candidates
72 getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
73 getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
74 $ filter (\(g,g') -> g /= g')
75 $ listToDirectedCombi gs
76
77
78 -- | To transform a Graph into Clusters
79 graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
80 graphToClusters clust (nodes,edges) = case clust of
81 Louvain (LouvainParams _) -> undefined
82 RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
83 _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
84
85
86 -- | To transform a list of PhyloGroups into a Graph of Proximity
87 groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
88 groupsToGraph nbDocs prox gs = case prox of
89 WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
90 $ getCandidates gs
91 candidates' = candidates `using` parList rdeepseq
92 in candidates' )
93 Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
94 _ -> undefined
95
96
97 -- | To filter a Graph of Proximity using a given threshold
98 filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
99 filterGraph prox (ns,es) = case prox of
100 WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
101 Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
102 _ -> undefined
103
104
105 -- | To clusterise a Phylo
106 phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
107 phyloToClusters lvl clus p = Map.fromList
108 $ zip periods
109 $ map (\g -> if null (fst g)
110 then []
111 else graphToClusters clus g) graphs'
112 where
113 --------------------------------------
114 graphs' :: [([GroupNode],[GroupEdge])]
115 graphs' = traceGraphFiltered lvl
116 $ map (\g -> filterGraph prox g) graphs
117 --------------------------------------
118 graphs :: [([GroupNode],[GroupEdge])]
119 graphs = traceGraph lvl (getThreshold prox)
120 $ let gs = (trace $ "PROX: " <> show prox) $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
121 gs' = gs `using` parList rdeepseq
122 in gs'
123 --------------------------------------
124 prox :: Proximity
125 prox = getProximity clus
126 --------------------------------------
127 periods :: [PhyloPeriodId]
128 periods = getPhyloPeriods p
129 --------------------------------------
130
131
132 ----------------
133 -- | Tracer | --
134 ----------------
135
136
137 traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
138 traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
139 <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
140 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
141 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
142 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
143 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
144 where
145 lst = sort $ map snd $ concat $ map snd g
146
147
148 traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
149 traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
150 <> "count : " <> show (length lst) <> " edges\n"
151 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
152 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
153 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
154 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
155 where
156 lst = sort $ map snd $ concat $ map snd g