2 Module : Gargantext.Core.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
14 module Gargantext.Core.Viz.Phylo.BranchMaker
17 import Control.Parallel.Strategies
18 import Control.Lens hiding (both, Level)
19 import Data.List (concat,nub,(++),sortOn,reverse,sort,null,intersect,union,delete)
20 import Data.Map (Map,(!), fromListWith, elems)
21 import Data.Tuple (fst, snd)
22 import Gargantext.Prelude
23 import Gargantext.Core.Viz.Phylo
24 import Gargantext.Core.Viz.Phylo.Cluster
25 import Gargantext.Core.Viz.Phylo.Aggregates
26 import Gargantext.Core.Viz.Phylo.Metrics
27 import Gargantext.Core.Viz.Phylo.Tools
28 import Gargantext.Core.Viz.Phylo.LinkMaker
30 import qualified Data.Map as Map
32 -- import Debug.Trace (trace)
34 ---------------------------
35 -- | Readability links | --
36 ---------------------------
38 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
39 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
41 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
42 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
45 getGroupsNgrams :: [PhyloGroup] -> [Int]
46 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
49 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
50 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
53 -- | Process a Jaccard on top of two set of Branch Peaks
54 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
55 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
56 / ((fromIntegral . length) $ union ns ns')) >= thr
59 -- | Get the framing period of a branch ([[PhyloGroup]])
60 getBranchPeriod :: [PhyloGroup] -> (Date,Date)
62 let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
63 in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
66 -- | Get the Nth most coocurent Ngrams in a list of Groups
67 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
68 getGroupsPeaks gs nth p = getNthMostOcc nth
69 $ getSubCooc (getGroupsNgrams gs)
70 $ getCooc (getGroupsPeriods gs) p
73 -- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
74 filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
75 filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
76 (getGroupsPeaks gs (getPhyloReBranchNth p) p)
77 (getGroupsPeaks gs' (getPhyloReBranchNth p) p))
78 && ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
79 && (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
83 -- | Try to connect a focused branch to other candidate branches by finding the best pointers
84 reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
85 reBranch p branch candidates =
86 let newLinks = map (\branch' ->
87 let pointers = map (\g ->
88 -- define pairs of candidates groups
89 let pairs = listToPairs
90 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
91 -- process the matching between the pairs and the current group
92 in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
94 then mem ++ [(getGroupId g,(getGroupId g2,s))]
95 else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
97 pointers' = pointers `using` parList rdeepseq
98 -- keep the best pointer between the focused branch and the current candidates
99 in head' "reBranch" $ reverse $ sortOn (snd . snd)
100 $ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
102 newLinks' = newLinks `using` parList rdeepseq
106 reLinkPhyloBranches :: Level -> Phylo -> Phylo
107 reLinkPhyloBranches lvl p =
108 let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
109 $ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
110 ([],branches) branches
111 in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
113 branches :: [[PhyloGroup]]
116 $ foldl' (\mem g -> case getGroupBranchId g of
118 Just i -> mem ++ [(i,[g])] )
119 [] $ getGroupsWithLevel lvl p
127 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
128 graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
129 graphToBranches groups = Map.fromList
131 $ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
134 $ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
137 -- | To set all the PhyloBranches for a given Level in a Phylo
138 setPhyloBranches :: Level -> Phylo -> Phylo
139 setPhyloBranches lvl p = alterGroupWithLevel (\g ->
140 let bIdx = branches ! (getGroupId g)
141 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
143 --------------------------------------
144 branches :: Map PhyloGroupId Int
145 branches = graphToBranches (getGroupsWithLevel lvl p)
146 --------------------------------------
149 -- trace' bs = trace bs