import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
+import Control.Monad (sequence)
-- import Debug.Trace (trace)
import qualified Data.Map as Map
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
- in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) ""
- (sum $ map _phylo_groupSupport childs) ngrams
+ in PhyloGroup (fst $ fst id) (_phylo_groupPeriod' $ head' "mergeGroups" childs)
+ (snd $ fst id) (snd id) ""
+ (sum $ map _phylo_groupSupport childs)
+ (fmap sum $ sequence
+ $ map _phylo_groupWeight childs)
+ (concat $ map _phylo_groupSources childs)
+ ngrams
(ngramsToCooc ngrams coocs)
((snd $ fst id),bId)
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
- []
+ (mergeAncestors $ concat $ map _phylo_groupAncestors childs)
where
--------------------
bId :: [Int]
--------------------
updatePointers :: [Pointer] -> [Pointer]
updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers
-
+ --------------------
+ mergeAncestors :: [Pointer] -> [Pointer]
+ mergeAncestors pointers = Map.toList $ fromListWith max pointers
addPhyloLevel :: Level -> Phylo -> Phylo
addPhyloLevel lvl phylo =
over ( phylo_periods . traverse )
(\phyloPrd -> phyloPrd & phylo_periodLevels
- %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) (PhyloLevel (phyloPrd ^. phylo_periodPeriod) lvl empty))) phylo
+ %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl)
+ (PhyloLevel (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriod') lvl empty))) phylo
toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
toEdges sens edges =
case prox of
WeightedLogJaccard _ -> map (\(g,g') ->
- ((g,g'), weightedLogJaccard' sens nbDocs diago
+ ((g,g'), weightedLogJaccard' (sens) nbDocs diago
+ (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
+ WeightedLogSim _ -> map (\(g,g') ->
+ ((g,g'), weightedLogJaccard' (1 / sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
+
_ -> undefined
toParentId :: PhyloGroup -> PhyloGroupId
ByProximityDistribution _ _ -> branches
+levelUpAncestors :: [PhyloGroup] -> [PhyloGroup]
+levelUpAncestors groups =
+ -- 1) create an associative map of (old,new) ids
+ let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupLevelParents))) groups
+ in map (\g ->
+ let id' = ids' ! (getGroupId g)
+ ancestors = g ^. phylo_groupAncestors
+ -- 2) level up the ancestors ids and filter the ones that will be merged
+ ancestors' = filter (\(id,_) -> id /= id') $ map (\(id,w) -> (ids' ! id,w)) ancestors
+ in g & phylo_groupAncestors .~ ancestors'
+ ) groups
synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo =
$ phyloToLastBranches
$ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq
- in toNextLevel' phylo $ concat newBranches'
+ in toNextLevel' phylo $ levelUpAncestors $ concat newBranches'
-- synchronicDistance :: Phylo -> Level -> String