2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.BranchMaker
20 import Control.Lens hiding (both, Level)
21 import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union)
23 import Data.Tuple (fst, snd)
24 import Gargantext.Prelude
25 import Gargantext.Viz.Phylo
26 import Gargantext.Viz.Phylo.Metrics.Clustering
27 import Gargantext.Viz.Phylo.Aggregates.Cooc
28 import Gargantext.Viz.Phylo.Tools
29 import Gargantext.Viz.Phylo.LinkMaker
31 import qualified Data.Map as Map
33 -- import Debug.Trace (trace)
35 ---------------------------
36 -- | Readability links | --
37 ---------------------------
39 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
40 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
42 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
43 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
46 getGroupsNgrams :: [PhyloGroup] -> [Int]
47 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
50 -- | Get the Nth most coocurent Ngrams in a list of Groups
51 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
52 getGroupsPeaks gs nth p = getNthMostOcc nth
53 $ getSubCooc (getGroupsNgrams gs)
54 $ getCooc (getGroupsPeriods gs) p
57 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
58 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
61 -- | Process a Jaccard on top of two set of Branch Peaks
62 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
63 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
64 / ((fromIntegral . length) $ union ns ns')) >= thr
67 findSimBranches :: Int -> Double -> Int -> Phylo -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloBranchId,[PhyloGroup])]
68 findSimBranches frame thr nth p (id,gs) bs
69 = filter (\(_ ,gs') -> areTwinPeaks thr pks (getGroupsPeaks gs' nth p))
70 $ filter (\(_ ,gs') -> (not . null) $ intersect ns (getGroupsNgrams gs'))
71 $ filter (\(_ ,gs') -> areDistant prd (getFramedPeriod gs') frame)
72 $ filter (\(id',_ ) -> id /= id') bs
74 --------------------------------------
76 prd = getFramedPeriod gs
77 --------------------------------------
79 ns = getGroupsNgrams gs
80 --------------------------------------
82 pks = getGroupsPeaks gs nth p
83 --------------------------------------
85 findBestPointer :: Phylo -> Proximity -> [PhyloGroup] -> [PhyloGroup] -> [(PhyloGroupId,Pointer)]
86 findBestPointer p prox gs gs' = take 1
90 $ map (\g -> let pts = findBestCandidates' prox gs' g p
91 in map (\pt -> (getGroupId g,pt)) pts) gs
93 makeBranchLinks :: Phylo -> Proximity -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloGroupId,Pointer)] -> [(PhyloGroupId,Pointer)]
94 makeBranchLinks p prox (id,gs) bs pts
96 | otherwise = makeBranchLinks p prox (head' "makeLink" bs) (tail bs) (pts ++ pts')
98 --------------------------------------
99 pts' :: [(PhyloGroupId,Pointer)]
100 pts' = concat $ map (\(_id,gs') -> findBestPointer p prox gs gs') candidates
101 --------------------------------------
102 candidates :: [(PhyloBranchId,[PhyloGroup])]
103 candidates = findSimBranches (getPhyloMatchingFrame p) (getPhyloReBranchThr p) (getPhyloReBranchNth p) p (id,gs) bs
107 linkPhyloBranches :: Level -> Proximity -> Phylo -> Phylo
108 linkPhyloBranches lvl prox p = setPhyloBranches lvl
109 $ updateGroups Descendant lvl pointers p
111 --------------------------------------
112 pointers :: Map PhyloGroupId [Pointer]
113 pointers = Map.fromList $ map (\(_id,(_id',_w)) -> (_id,[(_id',100)]))
114 $ makeBranchLinks p prox (head' "makeLink" branches) (tail branches) []
115 --------------------------------------
116 branches :: [(PhyloBranchId,[PhyloGroup])]
117 branches = sortOn (\(_id,gs) -> fst $ getFramedPeriod gs) $ getGroupsByBranches p
118 --------------------------------------
128 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
129 graphToBranches :: Level -> GroupGraph -> Phylo -> [(Int,PhyloGroupId)]
130 graphToBranches _lvl (nodes,edges) _p = concat
131 $ map (\(idx,gs) -> map (\g -> (idx,getGroupId g)) gs)
133 $ relatedComp 0 (head' "branchMaker" nodes) (tail nodes,edges) [] []
137 -- | To build a graph using the parents and childs pointers
138 makeGraph :: [PhyloGroup] -> Phylo -> GroupGraph
139 makeGraph gs p = (gs,edges)
142 edges = (nub . concat)
143 $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
145 (map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) gs
148 -- | To set all the PhyloBranches for a given Level in a Phylo
149 setPhyloBranches :: Level -> Phylo -> Phylo
150 setPhyloBranches lvl p = alterGroupWithLevel (\g -> let bIdx = (fst $ head' "branchMaker" $ filter (\b -> snd b == getGroupId g) bs)
151 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
153 --------------------------------------
154 bs :: [(Int,PhyloGroupId)]
155 bs = graphToBranches lvl graph p
156 --------------------------------------
158 graph = makeGraph (getGroupsWithLevel lvl p) p
159 --------------------------------------