]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/BranchMaker.hs
[DB SCHEMA] adding hash_id column to nodes
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / BranchMaker.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13
14 module Gargantext.Core.Viz.Phylo.BranchMaker
15 where
16
17 import Control.Parallel.Strategies
18 import Control.Lens hiding (both, Level)
19 import Data.List (concat,nub,(++),sortOn,reverse,sort,null,intersect,union,delete)
20 import Data.Map (Map,(!), fromListWith, elems)
21 import Data.Tuple (fst, snd)
22 import Gargantext.Prelude
23 import Gargantext.Core.Viz.Phylo
24 import Gargantext.Core.Viz.Phylo.Cluster
25 import Gargantext.Core.Viz.Phylo.Aggregates
26 import Gargantext.Core.Viz.Phylo.Metrics
27 import Gargantext.Core.Viz.Phylo.Tools
28 import Gargantext.Core.Viz.Phylo.LinkMaker
29
30 import qualified Data.Map as Map
31
32 -- import Debug.Trace (trace)
33
34 ---------------------------
35 -- | Readability links | --
36 ---------------------------
37
38 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
39 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
40
41 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
42 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
43
44
45 getGroupsNgrams :: [PhyloGroup] -> [Int]
46 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
47
48
49 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
50 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
51
52
53 -- | Process a Jaccard on top of two set of Branch Peaks
54 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
55 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
56 / ((fromIntegral . length) $ union ns ns')) >= thr
57
58
59 -- | Get the framing period of a branch ([[PhyloGroup]])
60 getBranchPeriod :: [PhyloGroup] -> (Date,Date)
61 getBranchPeriod gs =
62 let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
63 in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
64
65
66 -- | Get the Nth most coocurent Ngrams in a list of Groups
67 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
68 getGroupsPeaks gs nth p = getNthMostOcc nth
69 $ getSubCooc (getGroupsNgrams gs)
70 $ getCooc (getGroupsPeriods gs) p
71
72
73 -- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
74 filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
75 filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
76 (getGroupsPeaks gs (getPhyloReBranchNth p) p)
77 (getGroupsPeaks gs' (getPhyloReBranchNth p) p))
78 && ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
79 && (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
80 ) branches
81
82
83 -- | Try to connect a focused branch to other candidate branches by finding the best pointers
84 reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
85 reBranch p branch candidates =
86 let newLinks = map (\branch' ->
87 let pointers = map (\g ->
88 -- define pairs of candidates groups
89 let pairs = listToPairs
90 $ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
91 -- process the matching between the pairs and the current group
92 in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
93 in if (g2 == g3)
94 then mem ++ [(getGroupId g,(getGroupId g2,s))]
95 else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
96 ) branch
97 pointers' = pointers `using` parList rdeepseq
98 -- keep the best pointer between the focused branch and the current candidates
99 in head' "reBranch" $ reverse $ sortOn (snd . snd)
100 $ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
101 ) candidates
102 newLinks' = newLinks `using` parList rdeepseq
103 in newLinks'
104
105
106 reLinkPhyloBranches :: Level -> Phylo -> Phylo
107 reLinkPhyloBranches lvl p =
108 let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
109 $ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
110 ([],branches) branches
111 in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
112 where
113 branches :: [[PhyloGroup]]
114 branches = elems
115 $ fromListWith (++)
116 $ foldl' (\mem g -> case getGroupBranchId g of
117 Nothing -> mem
118 Just i -> mem ++ [(i,[g])] )
119 [] $ getGroupsWithLevel lvl p
120
121
122 ------------------
123 -- | Branches | --
124 ------------------
125
126
127 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
128 graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
129 graphToBranches groups = Map.fromList
130 $ concat
131 $ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
132 $ zip [1..]
133 $ relatedComp
134 $ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
135
136
137 -- | To set all the PhyloBranches for a given Level in a Phylo
138 setPhyloBranches :: Level -> Phylo -> Phylo
139 setPhyloBranches lvl p = alterGroupWithLevel (\g ->
140 let bIdx = branches ! (getGroupId g)
141 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
142 where
143 --------------------------------------
144 branches :: Map PhyloGroupId Int
145 branches = graphToBranches (getGroupsWithLevel lvl p)
146 --------------------------------------
147
148
149 -- trace' bs = trace bs