2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.BranchMaker
20 import Control.Lens hiding (both, Level)
21 import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union)
23 import Data.Tuple (fst, snd)
24 import Gargantext.Prelude
25 import Gargantext.Viz.Phylo
26 import Gargantext.Viz.Phylo.Metrics.Clustering
27 import Gargantext.Viz.Phylo.Aggregates.Cooc
28 import Gargantext.Viz.Phylo.Tools
29 import Gargantext.Viz.Phylo.LinkMaker
31 import qualified Data.Map as Map
33 -- import Debug.Trace (trace)
35 ---------------------------
36 -- | Readability links | --
37 ---------------------------
39 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
40 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
42 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
43 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
46 getGroupsNgrams :: [PhyloGroup] -> [Int]
47 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
50 getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
51 getNthMostOcc nth cooc = (nub . concat)
52 $ map (\((idx,idx'),_) -> [idx,idx'])
55 $ sortOn snd $ Map.toList cooc
58 -- | Get the Nth most coocurent Ngrams in a list of Groups
59 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
60 getGroupsPeaks gs nth p = getNthMostOcc nth
61 $ getSubCooc (getGroupsNgrams gs)
62 $ getCooc (getGroupsPeriods gs) p
64 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
65 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
68 -- | Process a Jaccard on top of two set of Branch Peaks
69 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
70 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
71 / ((fromIntegral . length) $ union ns ns')) >= thr
74 findSimBranches :: Int -> Double -> Int -> Phylo -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloBranchId,[PhyloGroup])]
75 findSimBranches frame thr nth p (id,gs) bs
76 = filter (\(_ ,gs') -> areTwinPeaks thr pks (getGroupsPeaks gs' nth p))
77 $ filter (\(_ ,gs') -> (not . null) $ intersect ns (getGroupsNgrams gs'))
78 $ filter (\(_ ,gs') -> areDistant prd (getFramedPeriod gs') frame)
79 $ filter (\(id',_ ) -> id /= id') bs
81 --------------------------------------
83 prd = getFramedPeriod gs
84 --------------------------------------
86 ns = getGroupsNgrams gs
87 --------------------------------------
89 pks = getGroupsPeaks gs nth p
90 --------------------------------------
92 findBestPointer :: Phylo -> Proximity -> [PhyloGroup] -> [PhyloGroup] -> [(PhyloGroupId,Pointer)]
93 findBestPointer p prox gs gs' = take 1
97 $ map (\g -> let pts = findBestCandidates' prox gs' g p
98 in map (\pt -> (getGroupId g,pt)) pts) gs
100 makeBranchLinks :: Phylo -> Proximity -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloGroupId,Pointer)] -> [(PhyloGroupId,Pointer)]
101 makeBranchLinks p prox (id,gs) bs pts
103 | otherwise = makeBranchLinks p prox (head' "makeLink" bs) (tail bs) (pts ++ pts')
105 --------------------------------------
106 pts' :: [(PhyloGroupId,Pointer)]
107 pts' = concat $ map (\(_id,gs') -> findBestPointer p prox gs gs') candidates
108 --------------------------------------
109 candidates :: [(PhyloBranchId,[PhyloGroup])]
110 candidates = findSimBranches (getPhyloMatchingFrame p) 0.9 4 p (id,gs) bs
114 linkPhyloBranches :: Level -> Proximity -> Phylo -> Phylo
115 linkPhyloBranches lvl prox p = setPhyloBranches lvl
116 $ updateGroups Descendant lvl pointers p
118 --------------------------------------
119 pointers :: Map PhyloGroupId [Pointer]
120 pointers = Map.fromList $ map (\(_id,(_id',_w)) -> (_id,[(_id',100)]))
121 $ makeBranchLinks p prox (head' "makeLink" branches) (tail branches) []
122 --------------------------------------
123 branches :: [(PhyloBranchId,[PhyloGroup])]
124 branches = sortOn (\(_id,gs) -> fst $ getFramedPeriod gs) $ getGroupsByBranches p
125 --------------------------------------
135 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
136 graphToBranches :: Level -> GroupGraph -> Phylo -> [(Int,PhyloGroupId)]
137 graphToBranches _lvl (nodes,edges) _p = concat
138 $ map (\(idx,gs) -> map (\g -> (idx,getGroupId g)) gs)
140 $ relatedComp 0 (head' "branchMaker" nodes) (tail nodes,edges) [] []
144 -- | To build a graph using the parents and childs pointers
145 makeGraph :: [PhyloGroup] -> Phylo -> GroupGraph
146 makeGraph gs p = (gs,edges)
149 edges = (nub . concat)
150 $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
152 (map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) gs
155 -- | To set all the PhyloBranches for a given Level in a Phylo
156 setPhyloBranches :: Level -> Phylo -> Phylo
157 setPhyloBranches lvl p = alterGroupWithLevel (\g -> let bIdx = (fst $ head' "branchMaker" $ filter (\b -> snd b == getGroupId g) bs)
158 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
160 --------------------------------------
161 bs :: [(Int,PhyloGroupId)]
162 bs = graphToBranches lvl graph p
163 --------------------------------------
165 graph = makeGraph (getGroupsWithLevel lvl p) p
166 --------------------------------------