]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Cluster.hs
Merge branch 'dev-phylo' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell-gargantex...
[gargantext.git] / src / Gargantext / Viz / Phylo / Cluster.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TemplateHaskell #-}
17
18 module Gargantext.Viz.Phylo.Cluster
19 where
20
21 import Control.Parallel.Strategies
22 import Data.Graph.Clustering.Louvain.CplusPlus
23 import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
24 import Data.Map (Map, fromList, mapKeys)
25 import Data.Tuple (fst)
26 import Gargantext.Prelude
27 import Gargantext.Viz.Phylo
28 import Gargantext.Viz.Phylo.Tools
29 import Gargantext.Viz.Phylo.Metrics
30 import Gargantext.Viz.Phylo.LinkMaker
31 import qualified Data.Map as Map
32
33 import qualified Data.Vector.Storable as VS
34 import Debug.Trace (trace)
35 import Numeric.Statistics (percentile)
36
37
38 --------------
39 -- | Algo | --
40 --------------
41
42
43 relatedComp :: Eq a => [[a]] -> [[a]]
44 relatedComp graphs = foldl' (\mem groups ->
45 if (null mem)
46 then mem ++ [groups]
47 else
48 let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
49 in if (null related)
50 then mem ++ [groups]
51 else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
52
53
54 louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
55 louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
56 <$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
57 <$> (cLouvain $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
58 where
59 --------------------------------------
60 idx :: PhyloGroup -> Int
61 idx e = case elemIndex e nodes of
62 Nothing -> panic "[ERR][Gargantext.Viz.Phylo.Metrics.Clustering] a node is missing"
63 Just i -> i
64 --------------------------------------
65
66
67 -----------------------
68 -- | Cluster Maker | --
69 -----------------------
70
71
72 -- | Optimisation to filter only relevant candidates
73 getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
74 getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
75 $ filter (\(g,g') -> g /= g')
76 $ listToDirectedCombi gs
77
78
79 -- | To transform a Graph into Clusters
80 graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
81 graphToClusters clust (nodes,edges) = case clust of
82 Louvain (LouvainParams _) -> undefined
83 RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
84 _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
85
86
87 -- | To transform a list of PhyloGroups into a Graph of Proximity
88 groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
89 groupsToGraph nbDocs prox gs = case prox of
90 WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
91 $ getCandidates gs
92 candidates' = candidates `using` parList rdeepseq
93 in candidates' )
94 Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
95 _ -> undefined
96
97
98 -- | To filter a Graph of Proximity using a given threshold
99 filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
100 filterGraph prox (ns,es) = case prox of
101 WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
102 Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
103 _ -> undefined
104
105
106 -- | To clusterise a Phylo
107 phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
108 phyloToClusters lvl clus p = Map.fromList
109 $ zip periods
110 $ map (\g -> if null (fst g)
111 then []
112 else graphToClusters clus g) graphs'
113 where
114 --------------------------------------
115 graphs' :: [([GroupNode],[GroupEdge])]
116 graphs' = traceGraphFiltered lvl
117 $ map (\g -> filterGraph prox g) graphs
118 --------------------------------------
119 graphs :: [([GroupNode],[GroupEdge])]
120 graphs = traceGraph lvl (getThreshold prox)
121 $ let gs = map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
122 gs' = gs `using` parList rdeepseq
123 in gs'
124 --------------------------------------
125 prox :: Proximity
126 prox = getProximity clus
127 --------------------------------------
128 periods :: [PhyloPeriodId]
129 periods = getPhyloPeriods p
130 --------------------------------------
131
132
133 ----------------
134 -- | Tracer | --
135 ----------------
136
137
138 traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
139 traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
140 <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
141 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
142 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
143 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
144 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
145 where
146 lst = sort $ map snd $ concat $ map snd g
147
148
149 traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
150 traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
151 <> "count : " <> show (length lst) <> " edges\n"
152 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
153 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
154 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
155 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
156 where
157 lst = sort $ map snd $ concat $ map snd g