{-| Module : Gargantext.Viz.Phylo.Tools Description : Phylomemy Tools to build/manage it Copyright : (c) CNRS, 2017-Present License : AGPL + CECILL v3 Maintainer : team@gargantext.org Stability : experimental Portability : POSIX -} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} module Gargantext.Viz.Phylo.BranchMaker where import Control.Parallel.Strategies import Control.Lens hiding (both, Level) import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union,delete) import Data.Map (Map,(!), fromListWith, elems) import Data.Tuple (fst, snd) import Gargantext.Prelude import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo.Metrics.Clustering import Gargantext.Viz.Phylo.Aggregates.Cooc import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.LinkMaker import qualified Data.Map as Map -- import Debug.Trace (trace) --------------------------- -- | Readability links | -- --------------------------- getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)] getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs getFramedPeriod :: [PhyloGroup] -> (Date,Date) getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs)) getGroupsNgrams :: [PhyloGroup] -> [Int] getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr) -- | Process a Jaccard on top of two set of Branch Peaks areTwinPeaks :: Double -> [Int] -> [Int] -> Bool areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns') / ((fromIntegral . length) $ union ns ns')) >= thr -- | Get the framing period of a branch ([[PhyloGroup]]) getBranchPeriod :: [PhyloGroup] -> (Date,Date) getBranchPeriod gs = let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates) -- | Get the Nth most coocurent Ngrams in a list of Groups getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int] getGroupsPeaks gs nth p = getNthMostOcc nth $ getSubCooc (getGroupsNgrams gs) $ getCooc (getGroupsPeriods gs) p -- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]] filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p) (getGroupsPeaks gs (getPhyloReBranchNth p) p) (getGroupsPeaks gs' (getPhyloReBranchNth p) p)) && ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs)) && (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p)) ) branches -- | Try to connect a focused branch to other candidate branches by finding the best pointers reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)] reBranch p branch candidates = let newLinks = map (\branch' -> let pointers = map (\g -> -- define pairs of candidates groups let pairs = listToPairs $ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch' -- process the matching between the pairs and the current group in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p in if (g2 == g3) then mem ++ [(getGroupId g,(getGroupId g2,s))] else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs ) branch pointers' = pointers `using` parList rdeepseq -- keep the best pointer between the focused branch and the current candidates in head' "reBranch" $ reverse $ sortOn (snd . snd) $ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers' ) candidates newLinks' = newLinks `using` parList rdeepseq in newLinks' reLinkPhyloBranches :: Level -> Phylo -> Phylo reLinkPhyloBranches lvl p = let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst $ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches')) ([],branches) branches in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p where branches :: [[PhyloGroup]] branches = elems $ fromListWith (++) $ foldl' (\mem g -> case getGroupBranchId g of Nothing -> mem Just i -> mem ++ [(i,[g])] ) [] $ getGroupsWithLevel lvl p ------------------ -- | Branches | -- ------------------ -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int graphToBranches groups = Map.fromList $ concat $ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds) $ zip [1..] $ relatedComp $ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups -- | To set all the PhyloBranches for a given Level in a Phylo setPhyloBranches :: Level -> Phylo -> Phylo setPhyloBranches lvl p = alterGroupWithLevel (\g -> let bIdx = branches ! (getGroupId g) in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p where -------------------------------------- branches :: Map PhyloGroupId Int branches = graphToBranches (getGroupsWithLevel lvl p) -------------------------------------- -- trace' bs = trace bs