]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/BranchMaker.hs
add a parameter in the config for finding the ancestors or not
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / BranchMaker.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13
14 module Gargantext.Core.Viz.Phylo.BranchMaker
15 where
16
17 import Control.Parallel.Strategies
18 import Control.Lens hiding (both, Level)
19 import Data.List (concat,nub,(++),sortOn,sort,null,intersect,union,delete)
20 import Data.Map (Map,(!), fromListWith, elems)
21 import Gargantext.Prelude
22 import Gargantext.Core.Viz.Phylo
23 import Gargantext.Core.Viz.Phylo.Cluster
24 import Gargantext.Core.Viz.Phylo.Aggregates
25 import Gargantext.Core.Viz.Phylo.Metrics
26 import Gargantext.Core.Viz.Phylo.Tools
27 import Gargantext.Core.Viz.Phylo.LinkMaker
28
29 import qualified Data.Map as Map
30
31 -- import Debug.Trace (trace)
32
33 ---------------------------
34 -- | Readability links | --
35 ---------------------------
36
37 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
38 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
39
40 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
41 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
42
43
44 getGroupsNgrams :: [PhyloGroup] -> [Int]
45 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
46
47
48 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
49 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
50
51
52 -- | Process a Jaccard on top of two set of Branch Peaks
53 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
54 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
55 / ((fromIntegral . length) $ union ns ns')) >= thr
56
57
58 -- | Get the framing period of a branch ([[PhyloGroup]])
59 getBranchPeriod :: [PhyloGroup] -> (Date,Date)
60 getBranchPeriod gs =
61 let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
62 in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
63
64
65 -- | Get the Nth most coocurent Ngrams in a list of Groups
66 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
67 getGroupsPeaks gs nth p = getNthMostOcc nth
68 $ getSubCooc (getGroupsNgrams gs)
69 $ getCooc (getGroupsPeriods gs) p
70
71
72 -- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
73 filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
74 filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
75 (getGroupsPeaks gs (getPhyloReBranchNth p) p)
76 (getGroupsPeaks gs' (getPhyloReBranchNth p) p))
77 && ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
78 && (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
79 ) branches
80
81
82 -- | Try to connect a focused branch to other candidate branches by finding the best pointers
83 reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
84 reBranch p branch candidates =
85 let newLinks = map (\branch' ->
86 let pointers = map (\g ->
87 -- define pairs of candidates groups
88 let pairs = listToPairs
89 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
90 -- process the matching between the pairs and the current group
91 in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
92 in if (g2 == g3)
93 then mem ++ [(getGroupId g,(getGroupId g2,s))]
94 else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
95 ) branch
96 pointers' = pointers `using` parList rdeepseq
97 -- keep the best pointer between the focused branch and the current candidates
98 in head' "reBranch" $ reverse $ sortOn (snd . snd)
99 $ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
100 ) candidates
101 newLinks' = newLinks `using` parList rdeepseq
102 in newLinks'
103
104
105 reLinkPhyloBranches :: Level -> Phylo -> Phylo
106 reLinkPhyloBranches lvl p =
107 let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
108 $ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
109 ([],branches) branches
110 in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
111 where
112 branches :: [[PhyloGroup]]
113 branches = elems
114 $ fromListWith (++)
115 $ foldl' (\mem g -> case getGroupBranchId g of
116 Nothing -> mem
117 Just i -> mem ++ [(i,[g])] )
118 [] $ getGroupsWithLevel lvl p
119
120
121 ------------------
122 -- | Branches | --
123 ------------------
124
125
126 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
127 graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
128 graphToBranches groups = Map.fromList
129 $ concat
130 $ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
131 $ zip [1..]
132 $ relatedComp
133 $ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
134
135
136 -- | To set all the PhyloBranches for a given Level in a Phylo
137 setPhyloBranches :: Level -> Phylo -> Phylo
138 setPhyloBranches lvl p = alterGroupWithLevel (\g ->
139 let bIdx = branches ! (getGroupId g)
140 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
141 where
142 --------------------------------------
143 branches :: Map PhyloGroupId Int
144 branches = graphToBranches (getGroupsWithLevel lvl p)
145 --------------------------------------
146
147
148 -- trace' bs = trace bs