2 Module : Gargantext.Viz.Phylo.SynchronicClustering
3 Description : Module dedicated to the adaptative synchronic clustering of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
16 module Gargantext.Viz.Phylo.SynchronicClustering where
18 import Gargantext.Prelude
19 import Gargantext.Viz.AdaptativePhylo
20 import Gargantext.Viz.Phylo.PhyloTools
21 import Gargantext.Viz.Phylo.TemporalMatching (weightedLogJaccard)
23 import Data.List ((++), null, intersect, nub, concat, sort, sortOn)
24 import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
26 import Control.Lens hiding (Level)
27 import Control.Parallel.Strategies (parList, rdeepseq, using)
29 import qualified Data.Map as Map
32 -------------------------
33 -- | New Level Maker | --
34 -------------------------
36 toBranchId :: PhyloGroup -> PhyloBranchId
37 toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId))
39 mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
40 mergeGroups coocs id mapIds childs =
41 let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
42 in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) ""
43 (sum $ map _phylo_groupSupport childs) ngrams
44 (ngramsToCooc ngrams coocs) (toBranchId (head' "mergeGroups" childs))
45 empty [] (map (\g -> (getGroupId g, 1)) childs)
46 (updatePointers $ concat $ map _phylo_groupPeriodParents childs)
47 (updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
49 updatePointers :: [Pointer] -> [Pointer]
50 updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers
53 addPhyloLevel :: Level -> Phylo -> Phylo
54 addPhyloLevel lvl phylo =
55 over ( phylo_periods . traverse )
56 (\phyloPrd -> phyloPrd & phylo_periodLevels
57 %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) (PhyloLevel (phyloPrd ^. phylo_periodPeriod) lvl empty))) phylo
60 toNextLevel :: Phylo -> [PhyloGroup] -> Phylo
61 toNextLevel phylo groups =
62 let curLvl = getLastLevel phylo
63 oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
64 newGroups = fromListWith (++)
65 -- | 5) group the parents by periods
66 $ foldlWithKey (\acc id groups' ->
67 -- | 4) create the parent group
68 let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups'
69 in acc ++ [(parent ^. phylo_groupPeriod, [parent])]) []
70 -- | 3) group the current groups by parentId
71 $ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
73 $ over ( phylo_periods . traverse . phylo_periodLevels . traverse
74 -- | 6) update each period at curLvl + 1
75 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1)))
76 -- | 7) by adding the parents
78 if member (phyloLvl ^. phylo_levelPeriod) newGroups
79 then phyloLvl & phylo_levelGroups
80 .~ fromList (map (\g -> (getGroupId g, g)) $ newGroups ! (phyloLvl ^. phylo_levelPeriod))
82 -- | 2) add the curLvl + 1 phyloLevel to the phylo
83 $ addPhyloLevel (curLvl + 1)
84 -- | 1) update the current groups (with level parent pointers) in the phylo
85 $ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
93 toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
94 toPairs groups = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))
98 toDiamonds :: [PhyloGroup] -> [[PhyloGroup]]
99 toDiamonds groups = foldl' (\acc groups' ->
101 $ Map.filter (\v -> length v > 1)
104 acc' ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodChilds)) [] groups')) []
106 $ Map.filter (\v -> length v > 1)
108 $ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups
111 groupsToEdges :: Proximity -> Synchrony -> Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
112 groupsToEdges prox sync docs groups =
114 ByProximityThreshold t s -> filter (\(_,w) -> w >= t)
118 ByProximityDistribution s ->
119 let diamonds = sortOn snd
121 $ map toPairs $ toDiamonds groups
122 in take (div (length diamonds) 2) diamonds
124 toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
127 WeightedLogJaccard _ _ _ -> map (\(g,g') ->
128 ((g,g'), weightedLogJaccard sens docs
129 (g ^. phylo_groupCooc) (g' ^. phylo_groupCooc)
130 (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
135 toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
136 toRelatedComponents nodes edges =
137 let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
138 clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
139 in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
141 toParentId :: PhyloGroup -> PhyloGroupId
142 toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
145 reduceBranch :: Proximity -> Synchrony -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
146 reduceBranch prox sync docs branch =
147 -- | 1) reduce a branch as a set of periods & groups
148 let periods = fromListWith (++)
149 $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
150 in (concat . concat . elems)
151 $ mapWithKey (\prd groups ->
152 -- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
153 let edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) groups
155 -- | 4) add to each groups their futur level parent group
156 let parentId = toParentId (head' "parentId" comp)
157 in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp )
158 -- |3) reduce the graph a a set of related components
159 $ toRelatedComponents groups edges) periods
162 synchronicClustering :: Phylo -> Phylo
163 synchronicClustering phylo =
164 let prox = phyloProximity $ getConfig phylo
165 sync = phyloSynchrony $ getConfig phylo
166 docs = phylo ^. phylo_timeDocs
167 branches = map (\branch -> reduceBranch prox sync docs branch)
168 $ phyloToLastBranches
169 $ traceSynchronyStart phylo
170 branches' = branches `using` parList rdeepseq
171 in toNextLevel phylo $ concat branches'
178 -- synchronicDistance :: Phylo -> Level -> String
179 -- synchronicDistance phylo lvl =
180 -- foldl' (\acc branch ->
181 -- acc <> (foldl' (\acc' period ->
182 -- acc' <> let prox = phyloProximity $ getConfig phylo
183 -- sync = phyloSynchrony $ getConfig phylo
184 -- docs = _phylo_timeDocs phylo
185 -- prd = _phylo_groupPeriod $ head' "distance" period
186 -- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
187 -- ((sum . elems) $ restrictKeys docs $ periodsToYears [_phylo_groupPeriod $ head' "distance" period]) period
188 -- in foldl' (\mem (_,w) ->
194 -- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
195 -- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo