]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/BranchMaker.hs
add parallelism
[gargantext.git] / src / Gargantext / Viz / Phylo / BranchMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.BranchMaker
18 where
19
20 import Control.Parallel.Strategies
21 import Control.Lens hiding (both, Level)
22 import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union)
23 import Data.Map (Map)
24 import Data.Tuple (fst, snd)
25 import Gargantext.Prelude
26 import Gargantext.Viz.Phylo
27 import Gargantext.Viz.Phylo.Metrics.Clustering
28 import Gargantext.Viz.Phylo.Aggregates.Cooc
29 import Gargantext.Viz.Phylo.Tools
30 import Gargantext.Viz.Phylo.LinkMaker
31
32 import qualified Data.Map as Map
33
34 -- import Debug.Trace (trace)
35
36 ---------------------------
37 -- | Readability links | --
38 ---------------------------
39
40 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
41 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
42
43 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
44 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
45
46
47 getGroupsNgrams :: [PhyloGroup] -> [Int]
48 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
49
50
51 -- | Get the Nth most coocurent Ngrams in a list of Groups
52 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
53 getGroupsPeaks gs nth p = getNthMostOcc nth
54 $ getSubCooc (getGroupsNgrams gs)
55 $ getCooc (getGroupsPeriods gs) p
56
57
58 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
59 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
60
61
62 -- | Process a Jaccard on top of two set of Branch Peaks
63 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
64 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
65 / ((fromIntegral . length) $ union ns ns')) >= thr
66
67
68 findSimBranches :: Int -> Double -> Int -> Phylo -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloBranchId,[PhyloGroup])]
69 findSimBranches frame thr nth p (id,gs) bs
70 = filter (\(_ ,gs') -> areTwinPeaks thr pks (getGroupsPeaks gs' nth p))
71 $ filter (\(_ ,gs') -> (not . null) $ intersect ns (getGroupsNgrams gs'))
72 $ filter (\(_ ,gs') -> areDistant prd (getFramedPeriod gs') frame)
73 $ filter (\(id',_ ) -> id /= id') bs
74 where
75 --------------------------------------
76 prd :: (Date,Date)
77 prd = getFramedPeriod gs
78 --------------------------------------
79 ns :: [Int]
80 ns = getGroupsNgrams gs
81 --------------------------------------
82 pks :: [Int]
83 pks = getGroupsPeaks gs nth p
84 --------------------------------------
85
86
87 findBestPointer :: Phylo -> Proximity -> [PhyloGroup] -> [PhyloGroup] -> [(PhyloGroupId,Pointer)]
88 findBestPointer p prox gs gs' =
89 let candidates = map (\g -> let pts = findBestCandidates' prox gs' g p
90 in map (\pt -> (getGroupId g,pt)) pts) gs
91 candidates' = candidates `using` parList rdeepseq
92 in take 1 $ reverse $ sortOn (snd . snd) $ concat candidates'
93
94
95 makeBranchLinks :: Phylo -> Proximity -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloGroupId,Pointer)] -> [(PhyloGroupId,Pointer)]
96 makeBranchLinks p prox (id,gs) bs pts
97 | null bs = pts
98 | otherwise = makeBranchLinks p prox (head' "makeLink" bs) (tail bs) (pts ++ pts')
99 where
100 --------------------------------------
101 pts' :: [(PhyloGroupId,Pointer)]
102 pts' = concat $ map (\(_id,gs') -> findBestPointer p prox gs gs') candidates
103 --------------------------------------
104 candidates :: [(PhyloBranchId,[PhyloGroup])]
105 candidates = findSimBranches (getPhyloMatchingFrame p) (getPhyloReBranchThr p) (getPhyloReBranchNth p) p (id,gs) bs
106
107
108
109 linkPhyloBranches :: Level -> Proximity -> Phylo -> Phylo
110 linkPhyloBranches lvl prox p = setPhyloBranches lvl
111 $ updateGroups Descendant lvl pointers p
112 where
113 --------------------------------------
114 pointers :: Map PhyloGroupId [Pointer]
115 pointers = Map.fromList $ map (\(_id,(_id',_w)) -> (_id,[(_id',100)]))
116 $ makeBranchLinks p prox (head' "makeLink" branches) (tail branches) []
117 --------------------------------------
118 branches :: [(PhyloBranchId,[PhyloGroup])]
119 branches = sortOn (\(_id,gs) -> fst $ getFramedPeriod gs) $ getGroupsByBranches p
120 --------------------------------------
121
122
123
124
125 ------------------
126 -- | Branches | --
127 ------------------
128
129
130 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
131 graphToBranches :: [PhyloGroup] -> Phylo -> [(Int,PhyloGroupId)]
132 graphToBranches groups p = concat
133 $ map (\(idx,gs) -> map (\g -> (idx,getGroupId g)) gs)
134 $ zip [1..]
135 $ relatedComp
136 $ map (\g -> nub $ [g] ++ (getGroupParents g p) ++ (getGroupChilds g p)) groups
137
138
139 -- | To set all the PhyloBranches for a given Level in a Phylo
140 setPhyloBranches :: Level -> Phylo -> Phylo
141 setPhyloBranches lvl p = alterGroupWithLevel (\g ->
142 let bIdx = (fst $ head' "branchMaker"
143 $ filter (\b -> snd b == getGroupId g) branches)
144 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
145 where
146 --------------------------------------
147 branches :: [(Int,PhyloGroupId)]
148 branches = graphToBranches (getGroupsWithLevel lvl p) p
149 --------------------------------------