2 Module : Gargantext.Core.Viz.Phylo.SynchronicClustering
3 Description : Module dedicated to the adaptative synchronic clustering of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Viz.Phylo.SynchronicClustering where
14 -- import Debug.Trace (trace)
15 import Control.Lens hiding (Level)
16 import Control.Monad (sequence)
17 import Control.Parallel.Strategies (parList, rdeepseq, using)
18 import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy)
19 import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
20 import Gargantext.Core.Viz.Phylo
21 import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
22 import Gargantext.Core.Viz.Phylo.PhyloTools
23 import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
24 import Gargantext.Prelude
25 import qualified Data.Map as Map
28 -------------------------
29 -- | New Level Maker | --
30 -------------------------
32 mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
33 mergeGroups coocs id mapIds childs =
34 let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
35 in PhyloGroup (fst $ fst id) (_phylo_groupPeriod' $ head' "mergeGroups" childs)
36 (snd $ fst id) (snd id) ""
37 (sum $ map _phylo_groupSupport childs)
39 $ map _phylo_groupWeight childs)
40 (concat $ map _phylo_groupSources childs)
42 (ngramsToCooc ngrams coocs)
44 (mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
45 (updatePointers $ concat $ map _phylo_groupPeriodParents childs)
46 (updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
47 (mergeAncestors $ concat $ map _phylo_groupAncestors childs)
48 (updatePointers' $ concat $ map _phylo_groupPeriodMemoryParents childs)
49 (updatePointers' $ concat $ map _phylo_groupPeriodMemoryChilds childs)
53 bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs
55 updatePointers :: [Pointer] -> [Pointer]
56 updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers
57 updatePointers' :: [Pointer'] -> [Pointer']
58 updatePointers' pointers = map (\(pId,(t,w)) -> (mapIds ! pId,(t,w))) pointers
60 mergeAncestors :: [Pointer] -> [Pointer]
61 mergeAncestors pointers = Map.toList $ fromListWith max pointers
63 addPhyloLevel :: Level -> Phylo -> Phylo
64 addPhyloLevel lvl phylo =
65 over ( phylo_periods . traverse )
66 (\phyloPrd -> phyloPrd & phylo_periodLevels
67 %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl)
68 (PhyloLevel (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriod') lvl empty))) phylo
71 toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
72 toNextLevel' phylo groups =
73 let curLvl = getLastLevel phylo
74 oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
75 newGroups = concat $ groupsToBranches
76 $ fromList $ map (\g -> (getGroupId g, g))
77 $ foldlWithKey (\acc id groups' ->
78 -- 4) create the parent group
79 let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups'
80 in acc ++ [parent]) []
81 -- 3) group the current groups by parentId
82 $ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
84 newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
86 $ over ( phylo_periods . traverse . phylo_periodLevels . traverse
87 -- 6) update each period at curLvl + 1
88 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1)))
89 -- 7) by adding the parents
91 if member (phyloLvl ^. phylo_levelPeriod) newPeriods
92 then phyloLvl & phylo_levelGroups
93 .~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod))
95 -- 2) add the curLvl + 1 phyloLevel to the phylo
96 $ addPhyloLevel (curLvl + 1)
97 -- 1) update the current groups (with level parent pointers) in the phylo
98 $ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
104 toPairs :: SynchronyStrategy -> [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
105 toPairs strategy groups = case strategy of
106 MergeRegularGroups -> pairs
107 $ filter (\g -> all (== 3) $ (g ^. phylo_groupMeta) ! "dynamics") groups
108 MergeAllGroups -> pairs groups
110 pairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
111 pairs gs = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) (listToCombi' gs)
114 toDiamonds :: [PhyloGroup] -> [[PhyloGroup]]
115 toDiamonds groups = foldl' (\acc groups' ->
117 $ Map.filter (\v -> length v > 1)
120 acc' ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodChilds)) [] groups')) []
122 $ Map.filter (\v -> length v > 1)
124 $ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups
127 groupsToEdges :: Proximity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
128 groupsToEdges prox sync nbDocs diago groups =
130 ByProximityThreshold thr sens _ strat ->
131 filter (\(_,w) -> w >= thr)
133 $ toPairs strat groups
134 ByProximityDistribution sens strat ->
135 let diamonds = sortOn snd
136 $ toEdges sens $ concat
137 $ map (\gs -> toPairs strat gs) $ toDiamonds groups
138 in take (div (length diamonds) 2) diamonds
140 toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
143 WeightedLogJaccard _ -> map (\(g,g') ->
144 ((g,g'), weightedLogJaccard' (sens) nbDocs diago
145 (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
146 WeightedLogSim _ -> map (\(g,g') ->
147 ((g,g'), weightedLogJaccard' (1 / sens) nbDocs diago
148 (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
152 toParentId :: PhyloGroup -> PhyloGroupId
153 toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
156 reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
157 reduceGroups prox sync docs diagos branch =
158 -- 1) reduce a branch as a set of periods & groups
159 let periods = fromListWith (++)
160 $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
161 in (concat . concat . elems)
162 $ mapWithKey (\prd groups ->
163 -- 2) for each period, transform the groups as a proximity graph filtered by a threshold
164 let diago = reduceDiagos $ filterDiago diagos [prd]
165 edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
167 -- 4) add to each groups their futur level parent group
168 let parentId = toParentId (head' "parentId" comp)
169 in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp )
170 -- 3) reduce the graph a a set of related components
171 $ toRelatedComponents groups edges) periods
174 adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
175 adjustClustering sync branches = case sync of
176 ByProximityThreshold _ _ scope _ -> case scope of
177 SingleBranch -> branches
178 SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks")
179 == (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks"))
180 $ sortOn _phylo_groupBranchId $ concat branches
181 AllBranches -> [concat branches]
182 ByProximityDistribution _ _ -> branches
185 levelUpAncestors :: [PhyloGroup] -> [PhyloGroup]
186 levelUpAncestors groups =
187 -- 1) create an associative map of (old,new) ids
188 let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupLevelParents))) groups
190 let id' = ids' ! (getGroupId g)
191 ancestors = g ^. phylo_groupAncestors
192 -- 2) level up the ancestors ids and filter the ones that will be merged
193 ancestors' = filter (\(id,_) -> id /= id') $ map (\(id,w) -> (ids' ! id,w)) ancestors
194 in g & phylo_groupAncestors .~ ancestors'
197 synchronicClustering :: Phylo -> Phylo
198 synchronicClustering phylo =
199 let prox = phyloProximity $ getConfig phylo
200 sync = phyloSynchrony $ getConfig phylo
201 docs = phylo ^. phylo_timeDocs
202 diagos = map coocToDiago $ phylo ^. phylo_timeCooc
203 newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
204 $ map processDynamics
205 $ adjustClustering sync
206 $ phyloToLastBranches
207 $ traceSynchronyStart phylo
208 newBranches' = newBranches `using` parList rdeepseq
209 in toNextLevel' phylo $ levelUpAncestors $ concat newBranches'
212 -- synchronicDistance :: Phylo -> Level -> String
213 -- synchronicDistance phylo lvl =
214 -- foldl' (\acc branch ->
215 -- acc <> (foldl' (\acc' period ->
216 -- acc' <> let prox = phyloProximity $ getConfig phylo
217 -- sync = phyloSynchrony $ getConfig phylo
218 -- docs = _phylo_timeDocs phylo
219 -- prd = _phylo_groupPeriod $ head' "distance" period
220 -- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
221 -- ((sum . elems) $ restrictKeys docs $ periodsToYears [_phylo_groupPeriod $ head' "distance" period]) period
222 -- in foldl' (\mem (_,w) ->
228 -- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
229 -- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo