]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
Merge branch 'dev-phylo' into dev-merge
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Cluster.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TemplateHaskell #-}
17
18 module Gargantext.Viz.Phylo.Aggregates.Cluster
19 where
20
21 import Control.Parallel.Strategies
22 import Data.List (null,concat,sort,intersect,(++))
23 import Data.Map (Map)
24 import Data.Tuple (fst)
25 import Gargantext.Prelude
26 import Gargantext.Viz.Phylo
27 import Gargantext.Viz.Phylo.Tools
28 import Gargantext.Viz.Phylo.Metrics.Proximity
29 import Gargantext.Viz.Phylo.Metrics.Clustering
30 import Gargantext.Viz.Phylo.LinkMaker
31 import qualified Data.Map as Map
32
33 import qualified Data.Vector.Storable as VS
34 import Debug.Trace (trace)
35 import Numeric.Statistics (percentile)
36
37
38 -- | Optimisation to filter only relevant candidates
39 getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
40 getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
41 $ filter (\(g,g') -> g /= g')
42 $ listToDirectedCombi gs
43
44
45 -- | To transform a Graph into Clusters
46 graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
47 graphToClusters clust (nodes,edges) = case clust of
48 Louvain (LouvainParams _) -> undefined
49 RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
50 _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
51
52
53 -- | To transform a list of PhyloGroups into a Graph of Proximity
54 groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
55 groupsToGraph nbDocs prox gs = case prox of
56 WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
57 $ getCandidates gs
58 candidates' = candidates `using` parList rdeepseq
59 in candidates' )
60 Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
61 _ -> undefined
62
63
64 -- | To filter a Graph of Proximity using a given threshold
65 filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
66 filterGraph prox (ns,es) = case prox of
67 WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
68 Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
69 _ -> undefined
70
71
72 -- | To clusterise a Phylo
73 phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
74 phyloToClusters lvl clus p = Map.fromList
75 $ zip periods
76 $ map (\g -> if null (fst g)
77 then []
78 else graphToClusters clus g) graphs'
79 where
80 --------------------------------------
81 graphs' :: [([GroupNode],[GroupEdge])]
82 graphs' = traceGraphFiltered lvl
83 $ map (\g -> filterGraph prox g) graphs
84 --------------------------------------
85 graphs :: [([GroupNode],[GroupEdge])]
86 graphs = traceGraph lvl (getThreshold prox)
87 $ let gs = map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
88 gs' = gs `using` parList rdeepseq
89 in gs'
90 --------------------------------------
91 prox :: Proximity
92 prox = getProximity clus
93 --------------------------------------
94 periods :: [PhyloPeriodId]
95 periods = getPhyloPeriods p
96 --------------------------------------
97
98
99 ----------------
100 -- | Tracer | --
101 ----------------
102
103
104 traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
105 traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
106 <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
107 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
108 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
109 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
110 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
111 where
112 lst = sort $ map snd $ concat $ map snd g
113
114
115 traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
116 traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
117 <> "count : " <> show (length lst) <> " edges\n"
118 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
119 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
120 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
121 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
122 where
123 lst = sort $ map snd $ concat $ map snd g