]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/BranchMaker.hs
Merge branch 'dev-phylo' into dev-merge
[gargantext.git] / src / Gargantext / Viz / Phylo / BranchMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.BranchMaker
18 where
19
20 import Control.Parallel.Strategies
21 import Control.Lens hiding (both, Level)
22 import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union,delete)
23 import Data.Map (Map,(!), fromListWith, elems)
24 import Data.Tuple (fst, snd)
25 import Gargantext.Prelude
26 import Gargantext.Viz.Phylo
27 import Gargantext.Viz.Phylo.Metrics.Clustering
28 import Gargantext.Viz.Phylo.Aggregates.Cooc
29 import Gargantext.Viz.Phylo.Tools
30 import Gargantext.Viz.Phylo.LinkMaker
31
32 import qualified Data.Map as Map
33
34 -- import Debug.Trace (trace)
35
36 ---------------------------
37 -- | Readability links | --
38 ---------------------------
39
40 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
41 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
42
43 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
44 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
45
46
47 getGroupsNgrams :: [PhyloGroup] -> [Int]
48 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
49
50
51 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
52 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
53
54
55 -- | Process a Jaccard on top of two set of Branch Peaks
56 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
57 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
58 / ((fromIntegral . length) $ union ns ns')) >= thr
59
60
61 -- | Get the framing period of a branch ([[PhyloGroup]])
62 getBranchPeriod :: [PhyloGroup] -> (Date,Date)
63 getBranchPeriod gs =
64 let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
65 in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
66
67
68 -- | Get the Nth most coocurent Ngrams in a list of Groups
69 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
70 getGroupsPeaks gs nth p = getNthMostOcc nth
71 $ getSubCooc (getGroupsNgrams gs)
72 $ getCooc (getGroupsPeriods gs) p
73
74
75 -- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
76 filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
77 filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
78 (getGroupsPeaks gs (getPhyloReBranchNth p) p)
79 (getGroupsPeaks gs' (getPhyloReBranchNth p) p))
80 && ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
81 && (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
82 ) branches
83
84
85 -- | Try to connect a focused branch to other candidate branches by finding the best pointers
86 reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
87 reBranch p branch candidates =
88 let newLinks = map (\branch' ->
89 let pointers = map (\g ->
90 -- define pairs of candidates groups
91 let pairs = listToPairs
92 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
93 -- process the matching between the pairs and the current group
94 in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
95 in if (g2 == g3)
96 then mem ++ [(getGroupId g,(getGroupId g2,s))]
97 else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
98 ) branch
99 pointers' = pointers `using` parList rdeepseq
100 -- keep the best pointer between the focused branch and the current candidates
101 in head' "reBranch" $ reverse $ sortOn (snd . snd)
102 $ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
103 ) candidates
104 newLinks' = newLinks `using` parList rdeepseq
105 in newLinks'
106
107
108 reLinkPhyloBranches :: Level -> Phylo -> Phylo
109 reLinkPhyloBranches lvl p =
110 let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
111 $ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
112 ([],branches) branches
113 in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
114 where
115 branches :: [[PhyloGroup]]
116 branches = elems
117 $ fromListWith (++)
118 $ foldl' (\mem g -> case getGroupBranchId g of
119 Nothing -> mem
120 Just i -> mem ++ [(i,[g])] )
121 [] $ getGroupsWithLevel lvl p
122
123
124 ------------------
125 -- | Branches | --
126 ------------------
127
128
129 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
130 graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
131 graphToBranches groups = Map.fromList
132 $ concat
133 $ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
134 $ zip [1..]
135 $ relatedComp
136 $ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
137
138
139 -- | To set all the PhyloBranches for a given Level in a Phylo
140 setPhyloBranches :: Level -> Phylo -> Phylo
141 setPhyloBranches lvl p = alterGroupWithLevel (\g ->
142 let bIdx = branches ! (getGroupId g)
143 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
144 where
145 --------------------------------------
146 branches :: Map PhyloGroupId Int
147 branches = graphToBranches (getGroupsWithLevel lvl p)
148 --------------------------------------
149
150
151 -- trace' bs = trace bs