2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.BranchMaker
20 import Control.Parallel.Strategies
21 import Control.Lens hiding (both, Level)
22 import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union,delete)
23 import Data.Map (Map,(!), fromListWith, elems)
24 import Data.Tuple (fst, snd)
25 import Gargantext.Prelude
26 import Gargantext.Viz.Phylo
27 import Gargantext.Viz.Phylo.Metrics.Clustering
28 import Gargantext.Viz.Phylo.Aggregates.Cooc
29 import Gargantext.Viz.Phylo.Tools
30 import Gargantext.Viz.Phylo.LinkMaker
32 import qualified Data.Map as Map
34 -- import Debug.Trace (trace)
36 ---------------------------
37 -- | Readability links | --
38 ---------------------------
40 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
41 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
43 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
44 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
47 getGroupsNgrams :: [PhyloGroup] -> [Int]
48 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
51 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
52 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
55 -- | Process a Jaccard on top of two set of Branch Peaks
56 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
57 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
58 / ((fromIntegral . length) $ union ns ns')) >= thr
61 -- | Get the framing period of a branch ([[PhyloGroup]])
62 getBranchPeriod :: [PhyloGroup] -> (Date,Date)
64 let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
65 in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
68 -- | Get the Nth most coocurent Ngrams in a list of Groups
69 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
70 getGroupsPeaks gs nth p = getNthMostOcc nth
71 $ getSubCooc (getGroupsNgrams gs)
72 $ getCooc (getGroupsPeriods gs) p
75 -- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
76 filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
77 filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
78 (getGroupsPeaks gs (getPhyloReBranchNth p) p)
79 (getGroupsPeaks gs' (getPhyloReBranchNth p) p))
80 && ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
81 && (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
85 -- | Try to connect a focused branch to other candidate branches by finding the best pointers
86 reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
87 reBranch p branch candidates =
88 let newLinks = map (\branch' ->
89 let pointers = map (\g ->
90 -- define pairs of candidates groups
91 let pairs = listToPairs
92 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
93 -- process the matching between the pairs and the current group
94 in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
96 then mem ++ [(getGroupId g,(getGroupId g2,s))]
97 else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
99 pointers' = pointers `using` parList rdeepseq
100 -- keep the best pointer between the focused branch and the current candidates
101 in head' "reBranch" $ reverse $ sortOn (snd . snd)
102 $ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
104 newLinks' = newLinks `using` parList rdeepseq
108 reLinkPhyloBranches :: Level -> Phylo -> Phylo
109 reLinkPhyloBranches lvl p =
110 let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
111 $ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
112 ([],branches) branches
113 in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
115 branches :: [[PhyloGroup]]
118 $ foldl' (\mem g -> case getGroupBranchId g of
120 Just i -> mem ++ [(i,[g])] )
121 [] $ getGroupsWithLevel lvl p
129 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
130 graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
131 graphToBranches groups = Map.fromList
133 $ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
136 $ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
139 -- | To set all the PhyloBranches for a given Level in a Phylo
140 setPhyloBranches :: Level -> Phylo -> Phylo
141 setPhyloBranches lvl p = alterGroupWithLevel (\g ->
142 let bIdx = branches ! (getGroupId g)
143 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
145 --------------------------------------
146 branches :: Map PhyloGroupId Int
147 branches = graphToBranches (getGroupsWithLevel lvl p)
148 --------------------------------------
151 -- trace' bs = trace bs