2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.BranchMaker
20 import Control.Parallel.Strategies
21 import Control.Lens hiding (both, Level)
22 import Data.List (concat,nub,(++),sortOn,reverse,sort,null,intersect,union,delete)
23 import Data.Map (Map,(!), fromListWith, elems)
24 import Data.Tuple (fst, snd)
25 import Gargantext.Prelude
26 import Gargantext.Viz.Phylo
27 import Gargantext.Viz.Phylo.Cluster
28 import Gargantext.Viz.Phylo.Aggregates
29 import Gargantext.Viz.Phylo.Metrics
30 import Gargantext.Viz.Phylo.Tools
31 import Gargantext.Viz.Phylo.LinkMaker
33 import qualified Data.Map as Map
35 -- import Debug.Trace (trace)
37 ---------------------------
38 -- | Readability links | --
39 ---------------------------
41 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
42 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
44 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
45 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
48 getGroupsNgrams :: [PhyloGroup] -> [Int]
49 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
52 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
53 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
56 -- | Process a Jaccard on top of two set of Branch Peaks
57 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
58 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
59 / ((fromIntegral . length) $ union ns ns')) >= thr
62 -- | Get the framing period of a branch ([[PhyloGroup]])
63 getBranchPeriod :: [PhyloGroup] -> (Date,Date)
65 let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
66 in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
69 -- | Get the Nth most coocurent Ngrams in a list of Groups
70 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
71 getGroupsPeaks gs nth p = getNthMostOcc nth
72 $ getSubCooc (getGroupsNgrams gs)
73 $ getCooc (getGroupsPeriods gs) p
76 -- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
77 filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
78 filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
79 (getGroupsPeaks gs (getPhyloReBranchNth p) p)
80 (getGroupsPeaks gs' (getPhyloReBranchNth p) p))
81 && ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
82 && (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
86 -- | Try to connect a focused branch to other candidate branches by finding the best pointers
87 reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
88 reBranch p branch candidates =
89 let newLinks = map (\branch' ->
90 let pointers = map (\g ->
91 -- define pairs of candidates groups
92 let pairs = listToPairs
93 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
94 -- process the matching between the pairs and the current group
95 in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
97 then mem ++ [(getGroupId g,(getGroupId g2,s))]
98 else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
100 pointers' = pointers `using` parList rdeepseq
101 -- keep the best pointer between the focused branch and the current candidates
102 in head' "reBranch" $ reverse $ sortOn (snd . snd)
103 $ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
105 newLinks' = newLinks `using` parList rdeepseq
109 reLinkPhyloBranches :: Level -> Phylo -> Phylo
110 reLinkPhyloBranches lvl p =
111 let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
112 $ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
113 ([],branches) branches
114 in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
116 branches :: [[PhyloGroup]]
119 $ foldl' (\mem g -> case getGroupBranchId g of
121 Just i -> mem ++ [(i,[g])] )
122 [] $ getGroupsWithLevel lvl p
130 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
131 graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
132 graphToBranches groups = Map.fromList
134 $ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
137 $ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
140 -- | To set all the PhyloBranches for a given Level in a Phylo
141 setPhyloBranches :: Level -> Phylo -> Phylo
142 setPhyloBranches lvl p = alterGroupWithLevel (\g ->
143 let bIdx = branches ! (getGroupId g)
144 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
146 --------------------------------------
147 branches :: Map PhyloGroupId Int
148 branches = graphToBranches (getGroupsWithLevel lvl p)
149 --------------------------------------
152 -- trace' bs = trace bs