module Gargantext.Core.Viz.Phylo.SynchronicClustering where
-import Gargantext.Prelude
-import Gargantext.Core.Viz.AdaptativePhylo
-import Gargantext.Core.Viz.Phylo.PhyloTools
-import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
-import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
-
-import Data.List ((++), null, intersect, nub, concat, sort, sortOn, all, groupBy)
-import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
-
+-- import Debug.Trace (trace)
import Control.Lens hiding (Level)
+import Control.Monad (sequence)
import Control.Parallel.Strategies (parList, rdeepseq, using)
--- import Debug.Trace (trace)
-
-import qualified Data.Map as Map
+import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy)
+import Data.Map.Strict (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
+import Gargantext.Core.Viz.Phylo
+import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
+import Gargantext.Core.Viz.Phylo.PhyloTools
+import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
+import Gargantext.Prelude
+import qualified Data.Map.Strict as Map
-------------------------
-- | New Level Maker | --
-------------------------
-
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
-mergeGroups coocs id mapIds childs =
+mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
- in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) ""
- (sum $ map _phylo_groupSupport childs) ngrams
- (ngramsToCooc ngrams coocs)
+ in PhyloGroup (fst $ fst id) (_phylo_groupPeriod' $ head' "mergeGroups" childs)
+ (snd $ fst id) (snd id) ""
+ (sum $ map _phylo_groupSupport childs)
+ (fmap sum $ sequence
+ $ map _phylo_groupWeight childs)
+ (concat $ map _phylo_groupSources childs)
+ ngrams
+ (ngramsToCooc ngrams coocs)
((snd $ fst id),bId)
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
- []
+ (mergeAncestors $ concat $ map _phylo_groupAncestors childs)
+ (updatePointers' $ concat $ map _phylo_groupPeriodMemoryParents childs)
+ (updatePointers' $ concat $ map _phylo_groupPeriodMemoryChilds childs)
where
--------------------
bId :: [Int]
--------------------
updatePointers :: [Pointer] -> [Pointer]
updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers
+ updatePointers' :: [Pointer'] -> [Pointer']
+ updatePointers' pointers = map (\(pId,(t,w)) -> (mapIds ! pId,(t,w))) pointers
+ --------------------
+ mergeAncestors :: [Pointer] -> [Pointer]
+ mergeAncestors pointers = Map.toList $ fromListWith max pointers
-
-addPhyloLevel :: Level -> Phylo -> Phylo
-addPhyloLevel lvl phylo =
- over ( phylo_periods . traverse )
- (\phyloPrd -> phyloPrd & phylo_periodLevels
- %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) (PhyloLevel (phyloPrd ^. phylo_periodPeriod) lvl empty))) phylo
+addPhyloScale :: Scale -> Phylo -> Phylo
+addPhyloScale lvl phylo =
+ over ( phylo_periods . traverse )
+ (\phyloPrd -> phyloPrd & phylo_periodScales
+ %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl)
+ (PhyloScale (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriodStr) lvl empty))) phylo
-toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
-toNextLevel' phylo groups =
+toNextScale :: Phylo -> [PhyloGroup] -> Phylo
+toNextScale phylo groups =
let curLvl = getLastLevel phylo
oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
- newGroups = concat $ groupsToBranches
+ newGroups = concat $ groupsToBranches'
$ fromList $ map (\g -> (getGroupId g, g))
$ foldlWithKey (\acc id groups' ->
-- 4) create the parent group
$ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
- in traceSynchronyEnd
- $ over ( phylo_periods . traverse . phylo_periodLevels . traverse
+ in traceSynchronyEnd
+ $ over ( phylo_periods . traverse . phylo_periodScales . traverse
-- 6) update each period at curLvl + 1
- . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1)))
+ . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == (curLvl + 1)))
-- 7) by adding the parents
- (\phyloLvl ->
- if member (phyloLvl ^. phylo_levelPeriod) newPeriods
- then phyloLvl & phylo_levelGroups
- .~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod))
+ (\phyloLvl ->
+ if member (phyloLvl ^. phylo_scalePeriod) newPeriods
+ then phyloLvl & phylo_scaleGroups
+ .~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_scalePeriod))
else phyloLvl)
- -- 2) add the curLvl + 1 phyloLevel to the phylo
- $ addPhyloLevel (curLvl + 1)
+ -- 2) add the curLvl + 1 PhyloScale to the phylo
+ $ addPhyloScale (curLvl + 1)
-- 1) update the current groups (with level parent pointers) in the phylo
- $ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
+ $ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
--------------------
-- | Clustering | --
--------------------
toPairs :: SynchronyStrategy -> [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
-toPairs strategy groups = case strategy of
+toPairs strategy groups = case strategy of
MergeRegularGroups -> pairs
$ filter (\g -> all (== 3) $ (g ^. phylo_groupMeta) ! "dynamics") groups
MergeAllGroups -> pairs groups
- where
+ where
pairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
pairs gs = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) (listToCombi' gs)
acc ++ ( elems
$ Map.filter (\v -> length v > 1)
$ fromListWith (++)
- $ foldl' (\acc' g ->
+ $ foldl' (\acc' g ->
acc' ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodChilds)) [] groups')) []
$ elems
$ Map.filter (\v -> length v > 1)
ByProximityThreshold thr sens _ strat ->
filter (\(_,w) -> w >= thr)
$ toEdges sens
- $ toPairs strat groups
- ByProximityDistribution sens strat ->
- let diamonds = sortOn snd
+ $ toPairs strat groups
+ ByProximityDistribution sens strat ->
+ let diamonds = sortOn snd
$ toEdges sens $ concat
- $ map (\gs -> toPairs strat gs) $ toDiamonds groups
+ $ map (\gs -> toPairs strat gs) $ toDiamonds groups
in take (div (length diamonds) 2) diamonds
- where
+ where
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
- toEdges sens edges =
+ toEdges sens edges =
case prox of
- WeightedLogJaccard _ -> map (\(g,g') ->
- ((g,g'), weightedLogJaccard' sens nbDocs diago
+ WeightedLogJaccard _ _ -> map (\(g,g') ->
+ ((g,g'), weightedLogJaccard' (sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
- _ -> undefined
+ WeightedLogSim _ _ -> map (\(g,g') ->
+ ((g,g'), weightedLogJaccard' (1 / sens) nbDocs diago
+ (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
+
+ _ -> undefined
toParentId :: PhyloGroup -> PhyloGroupId
-toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
+toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex)
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems)
- $ mapWithKey (\prd groups ->
+ -- TODO : ajouter un parallelisme
+ $ mapWithKey (\prd groups ->
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd]
edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
- in map (\comp ->
+ in map (\comp ->
-- 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp)
- in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp )
+ in map (\g -> g & phylo_groupScaleParents %~ (++ [(parentId,1)]) ) comp )
-- 3) reduce the graph a a set of related components
- $ toRelatedComponents groups edges) periods
+ $ toRelatedComponents groups edges) periods
-adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
-adjustClustering sync branches = case sync of
- ByProximityThreshold _ _ scope _ -> case scope of
+chooseClusteringStrategy :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
+chooseClusteringStrategy sync branches = case sync of
+ ByProximityThreshold _ _ scope _ -> case scope of
SingleBranch -> branches
- SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks")
- == (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks"))
+ SiblingBranches -> groupBy (\g g' -> (last' "chooseClusteringStrategy" $ (g ^. phylo_groupMeta) ! "breaks")
+ == (last' "chooseClusteringStrategy" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches
AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches
+levelUpAncestors :: [PhyloGroup] -> [PhyloGroup]
+levelUpAncestors groups =
+ -- 1) create an associative map of (old,new) ids
+ let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupScaleParents))) groups
+ in map (\g ->
+ let id' = ids' ! (getGroupId g)
+ ancestors = g ^. phylo_groupAncestors
+ -- 2) level up the ancestors ids and filter the ones that will be merged
+ ancestors' = filter (\(id,_) -> id /= id') $ map (\(id,w) -> (ids' ! id,w)) ancestors
+ in g & phylo_groupAncestors .~ ancestors'
+ ) groups
synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo =
sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs
diagos = map coocToDiago $ phylo ^. phylo_timeCooc
- newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
+ newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics
- $ adjustClustering sync
- $ phyloToLastBranches
+ $ chooseClusteringStrategy sync
+ $ phyloLastScale
$ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq
- in toNextLevel' phylo $ concat newBranches'
+ in toNextScale phylo $ levelUpAncestors $ concat newBranches'
--- synchronicDistance :: Phylo -> Level -> String
--- synchronicDistance phylo lvl =
--- foldl' (\acc branch ->
+-- synchronicSimilarity :: Phylo -> Level -> String
+-- synchronicSimilarity phylo lvl =
+-- foldl' (\acc branch ->
-- acc <> (foldl' (\acc' period ->
-- acc' <> let prox = phyloProximity $ getConfig phylo
-- sync = phyloSynchrony $ getConfig phylo
-- docs = _phylo_timeDocs phylo
-- prd = _phylo_groupPeriod $ head' "distance" period
--- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
+-- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
-- ((sum . elems) $ restrictKeys docs $ periodsToYears [_phylo_groupPeriod $ head' "distance" period]) period
--- in foldl' (\mem (_,w) ->
+-- in foldl' (\mem (_,w) ->
-- mem <> show (prd)
-- <> "\t"
-- <> show (w)
-- <> "\n"
--- ) "" edges
+-- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo