2 Module : Gargantext.Core.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
14 module Gargantext.Core.Viz.Phylo.BranchMaker
17 import Control.Parallel.Strategies
18 import Control.Lens hiding (both, Level)
19 import Data.List (concat,nub,(++),sortOn,sort,null,intersect,union,delete)
20 import Data.Map (Map,(!), fromListWith, elems)
21 import Gargantext.Prelude
22 import Gargantext.Core.Viz.Phylo
23 import Gargantext.Core.Viz.Phylo.Cluster
24 import Gargantext.Core.Viz.Phylo.Aggregates
25 import Gargantext.Core.Viz.Phylo.Metrics
26 import Gargantext.Core.Viz.Phylo.Tools
27 import Gargantext.Core.Viz.Phylo.LinkMaker
29 import qualified Data.Map as Map
31 -- import Debug.Trace (trace)
33 ---------------------------
34 -- | Readability links | --
35 ---------------------------
37 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
38 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
40 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
41 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
44 getGroupsNgrams :: [PhyloGroup] -> [Int]
45 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
48 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
49 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
52 -- | Process a Jaccard on top of two set of Branch Peaks
53 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
54 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
55 / ((fromIntegral . length) $ union ns ns')) >= thr
58 -- | Get the framing period of a branch ([[PhyloGroup]])
59 getBranchPeriod :: [PhyloGroup] -> (Date,Date)
61 let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
62 in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
65 -- | Get the Nth most coocurent Ngrams in a list of Groups
66 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
67 getGroupsPeaks gs nth p = getNthMostOcc nth
68 $ getSubCooc (getGroupsNgrams gs)
69 $ getCooc (getGroupsPeriods gs) p
72 -- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
73 filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
74 filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
75 (getGroupsPeaks gs (getPhyloReBranchNth p) p)
76 (getGroupsPeaks gs' (getPhyloReBranchNth p) p))
77 && ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
78 && (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
82 -- | Try to connect a focused branch to other candidate branches by finding the best pointers
83 reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
84 reBranch p branch candidates =
85 let newLinks = map (\branch' ->
86 let pointers = map (\g ->
87 -- define pairs of candidates groups
88 let pairs = listToPairs
89 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
90 -- process the matching between the pairs and the current group
91 in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
93 then mem ++ [(getGroupId g,(getGroupId g2,s))]
94 else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
96 pointers' = pointers `using` parList rdeepseq
97 -- keep the best pointer between the focused branch and the current candidates
98 in head' "reBranch" $ reverse $ sortOn (snd . snd)
99 $ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
101 newLinks' = newLinks `using` parList rdeepseq
105 reLinkPhyloBranches :: Level -> Phylo -> Phylo
106 reLinkPhyloBranches lvl p =
107 let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
108 $ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
109 ([],branches) branches
110 in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
112 branches :: [[PhyloGroup]]
115 $ foldl' (\mem g -> case getGroupBranchId g of
117 Just i -> mem ++ [(i,[g])] )
118 [] $ getGroupsWithLevel lvl p
126 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
127 graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
128 graphToBranches groups = Map.fromList
130 $ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
133 $ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
136 -- | To set all the PhyloBranches for a given Level in a Phylo
137 setPhyloBranches :: Level -> Phylo -> Phylo
138 setPhyloBranches lvl p = alterGroupWithLevel (\g ->
139 let bIdx = branches ! (getGroupId g)
140 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
142 --------------------------------------
143 branches :: Map PhyloGroupId Int
144 branches = graphToBranches (getGroupsWithLevel lvl p)
145 --------------------------------------
148 -- trace' bs = trace bs