]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/BranchMaker.hs
add rebranching to link distante branches
[gargantext.git] / src / Gargantext / Viz / Phylo / BranchMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.BranchMaker
18 where
19
20 import Control.Lens hiding (both, Level)
21 import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union)
22 import Data.Map (Map)
23 import Data.Tuple (fst, snd)
24 import Gargantext.Prelude
25 import Gargantext.Viz.Phylo
26 import Gargantext.Viz.Phylo.Metrics.Clustering
27 import Gargantext.Viz.Phylo.Aggregates.Cooc
28 import Gargantext.Viz.Phylo.Tools
29 import Gargantext.Viz.Phylo.LinkMaker
30
31 import qualified Data.Map as Map
32
33 -- import Debug.Trace (trace)
34
35 ---------------------------
36 -- | Readability links | --
37 ---------------------------
38
39 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
40 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
41
42 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
43 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
44
45
46 getGroupsNgrams :: [PhyloGroup] -> [Int]
47 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
48
49
50 getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
51 getNthMostOcc nth cooc = (nub . concat)
52 $ map (\((idx,idx'),_) -> [idx,idx'])
53 $ take (nth `div` 2)
54 $ reverse
55 $ sortOn snd $ Map.toList cooc
56
57
58 -- | Get the Nth most coocurent Ngrams in a list of Groups
59 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
60 getGroupsPeaks gs nth p = getNthMostOcc nth
61 $ getSubCooc (getGroupsNgrams gs)
62 $ getCooc (getGroupsPeriods gs) p
63
64 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
65 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
66
67
68 -- | Process a Jaccard on top of two set of Branch Peaks
69 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
70 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
71 / ((fromIntegral . length) $ union ns ns')) >= thr
72
73
74 findSimBranches :: Int -> Double -> Int -> Phylo -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloBranchId,[PhyloGroup])]
75 findSimBranches frame thr nth p (id,gs) bs
76 = filter (\(_ ,gs') -> areTwinPeaks thr pks (getGroupsPeaks gs' nth p))
77 $ filter (\(_ ,gs') -> (not . null) $ intersect ns (getGroupsNgrams gs'))
78 $ filter (\(_ ,gs') -> areDistant prd (getFramedPeriod gs') frame)
79 $ filter (\(id',_ ) -> id /= id') bs
80 where
81 --------------------------------------
82 prd :: (Date,Date)
83 prd = getFramedPeriod gs
84 --------------------------------------
85 ns :: [Int]
86 ns = getGroupsNgrams gs
87 --------------------------------------
88 pks :: [Int]
89 pks = getGroupsPeaks gs nth p
90 --------------------------------------
91
92 findBestPointer :: Phylo -> Proximity -> [PhyloGroup] -> [PhyloGroup] -> [(PhyloGroupId,Pointer)]
93 findBestPointer p prox gs gs' = take 1
94 $ reverse
95 $ sortOn (snd . snd)
96 $ concat
97 $ map (\g -> let pts = findBestCandidates' prox gs' g p
98 in map (\pt -> (getGroupId g,pt)) pts) gs
99
100 makeBranchLinks :: Phylo -> Proximity -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloGroupId,Pointer)] -> [(PhyloGroupId,Pointer)]
101 makeBranchLinks p prox (id,gs) bs pts
102 | null bs = pts
103 | otherwise = makeBranchLinks p prox (head' "makeLink" bs) (tail bs) (pts ++ pts')
104 where
105 --------------------------------------
106 pts' :: [(PhyloGroupId,Pointer)]
107 pts' = concat $ map (\(_id,gs') -> findBestPointer p prox gs gs') candidates
108 --------------------------------------
109 candidates :: [(PhyloBranchId,[PhyloGroup])]
110 candidates = findSimBranches (getPhyloMatchingFrame p) 0.9 4 p (id,gs) bs
111
112
113
114 linkPhyloBranches :: Level -> Proximity -> Phylo -> Phylo
115 linkPhyloBranches lvl prox p = setPhyloBranches lvl
116 $ updateGroups Descendant lvl pointers p
117 where
118 --------------------------------------
119 pointers :: Map PhyloGroupId [Pointer]
120 pointers = Map.fromList $ map (\(_id,(_id',_w)) -> (_id,[(_id',100)]))
121 $ makeBranchLinks p prox (head' "makeLink" branches) (tail branches) []
122 --------------------------------------
123 branches :: [(PhyloBranchId,[PhyloGroup])]
124 branches = sortOn (\(_id,gs) -> fst $ getFramedPeriod gs) $ getGroupsByBranches p
125 --------------------------------------
126
127
128
129
130 ------------------
131 -- | Branches | --
132 ------------------
133
134
135 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
136 graphToBranches :: Level -> GroupGraph -> Phylo -> [(Int,PhyloGroupId)]
137 graphToBranches _lvl (nodes,edges) _p = concat
138 $ map (\(idx,gs) -> map (\g -> (idx,getGroupId g)) gs)
139 $ zip [1..]
140 $ relatedComp 0 (head' "branchMaker" nodes) (tail nodes,edges) [] []
141
142
143
144 -- | To build a graph using the parents and childs pointers
145 makeGraph :: [PhyloGroup] -> Phylo -> GroupGraph
146 makeGraph gs p = (gs,edges)
147 where
148 edges :: [GroupEdge]
149 edges = (nub . concat)
150 $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
151 ++
152 (map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) gs
153
154
155 -- | To set all the PhyloBranches for a given Level in a Phylo
156 setPhyloBranches :: Level -> Phylo -> Phylo
157 setPhyloBranches lvl p = alterGroupWithLevel (\g -> let bIdx = (fst $ head' "branchMaker" $ filter (\b -> snd b == getGroupId g) bs)
158 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
159 where
160 --------------------------------------
161 bs :: [(Int,PhyloGroupId)]
162 bs = graphToBranches lvl graph p
163 --------------------------------------
164 graph :: GroupGraph
165 graph = makeGraph (getGroupsWithLevel lvl p) p
166 --------------------------------------