2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.BranchMaker
20 import Control.Parallel.Strategies
21 import Control.Lens hiding (both, Level)
22 import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union)
24 import Data.Tuple (fst, snd)
25 import Gargantext.Prelude
26 import Gargantext.Viz.Phylo
27 import Gargantext.Viz.Phylo.Metrics.Clustering
28 import Gargantext.Viz.Phylo.Aggregates.Cooc
29 import Gargantext.Viz.Phylo.Tools
30 import Gargantext.Viz.Phylo.LinkMaker
32 import qualified Data.Map as Map
34 -- import Debug.Trace (trace)
36 ---------------------------
37 -- | Readability links | --
38 ---------------------------
40 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
41 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
43 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
44 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
47 getGroupsNgrams :: [PhyloGroup] -> [Int]
48 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
51 -- | Get the Nth most coocurent Ngrams in a list of Groups
52 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
53 getGroupsPeaks gs nth p = getNthMostOcc nth
54 $ getSubCooc (getGroupsNgrams gs)
55 $ getCooc (getGroupsPeriods gs) p
58 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
59 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
62 -- | Process a Jaccard on top of two set of Branch Peaks
63 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
64 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
65 / ((fromIntegral . length) $ union ns ns')) >= thr
68 findSimBranches :: Int -> Double -> Int -> Phylo -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloBranchId,[PhyloGroup])]
69 findSimBranches frame thr nth p (id,gs) bs
70 = filter (\(_ ,gs') -> areTwinPeaks thr pks (getGroupsPeaks gs' nth p))
71 $ filter (\(_ ,gs') -> (not . null) $ intersect ns (getGroupsNgrams gs'))
72 $ filter (\(_ ,gs') -> areDistant prd (getFramedPeriod gs') frame)
73 $ filter (\(id',_ ) -> id /= id') bs
75 --------------------------------------
77 prd = getFramedPeriod gs
78 --------------------------------------
80 ns = getGroupsNgrams gs
81 --------------------------------------
83 pks = getGroupsPeaks gs nth p
84 --------------------------------------
87 findBestPointer :: Phylo -> Proximity -> [PhyloGroup] -> [PhyloGroup] -> [(PhyloGroupId,Pointer)]
88 findBestPointer p prox gs gs' =
89 let candidates = map (\g -> let pts = findBestCandidates' prox gs' g p
90 in map (\pt -> (getGroupId g,pt)) pts) gs
91 candidates' = candidates `using` parList rdeepseq
92 in take 1 $ reverse $ sortOn (snd . snd) $ concat candidates'
95 makeBranchLinks :: Phylo -> Proximity -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloGroupId,Pointer)] -> [(PhyloGroupId,Pointer)]
96 makeBranchLinks p prox (id,gs) bs pts
98 | otherwise = makeBranchLinks p prox (head' "makeLink" bs) (tail bs) (pts ++ pts')
100 --------------------------------------
101 pts' :: [(PhyloGroupId,Pointer)]
102 pts' = concat $ map (\(_id,gs') -> findBestPointer p prox gs gs') candidates
103 --------------------------------------
104 candidates :: [(PhyloBranchId,[PhyloGroup])]
105 candidates = findSimBranches (getPhyloMatchingFrame p) (getPhyloReBranchThr p) (getPhyloReBranchNth p) p (id,gs) bs
109 linkPhyloBranches :: Level -> Proximity -> Phylo -> Phylo
110 linkPhyloBranches lvl prox p = setPhyloBranches lvl
111 $ updateGroups Descendant lvl pointers p
113 --------------------------------------
114 pointers :: Map PhyloGroupId [Pointer]
115 pointers = Map.fromList $ map (\(_id,(_id',_w)) -> (_id,[(_id',100)]))
116 $ makeBranchLinks p prox (head' "makeLink" branches) (tail branches) []
117 --------------------------------------
118 branches :: [(PhyloBranchId,[PhyloGroup])]
119 branches = sortOn (\(_id,gs) -> fst $ getFramedPeriod gs) $ getGroupsByBranches p
120 --------------------------------------
130 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
131 graphToBranches :: [PhyloGroup] -> Phylo -> [(Int,PhyloGroupId)]
132 graphToBranches groups p = concat
133 $ map (\(idx,gs) -> map (\g -> (idx,getGroupId g)) gs)
136 $ map (\g -> nub $ [g] ++ (getGroupParents g p) ++ (getGroupChilds g p)) groups
139 -- | To set all the PhyloBranches for a given Level in a Phylo
140 setPhyloBranches :: Level -> Phylo -> Phylo
141 setPhyloBranches lvl p = alterGroupWithLevel (\g ->
142 let bIdx = (fst $ head' "branchMaker"
143 $ filter (\b -> snd b == getGroupId g) branches)
144 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
146 --------------------------------------
147 branches :: [(Int,PhyloGroupId)]
148 branches = graphToBranches (getGroupsWithLevel lvl p) p
149 --------------------------------------