]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/BranchMaker.hs
fix the diagonal issue
[gargantext.git] / src / Gargantext / Viz / Phylo / BranchMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.BranchMaker
18 where
19
20 import Control.Lens hiding (both, Level)
21 import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union)
22 import Data.Map (Map)
23 import Data.Tuple (fst, snd)
24 import Gargantext.Prelude
25 import Gargantext.Viz.Phylo
26 import Gargantext.Viz.Phylo.Metrics.Clustering
27 import Gargantext.Viz.Phylo.Aggregates.Cooc
28 import Gargantext.Viz.Phylo.Tools
29 import Gargantext.Viz.Phylo.LinkMaker
30
31 import qualified Data.Map as Map
32
33 -- import Debug.Trace (trace)
34
35 ---------------------------
36 -- | Readability links | --
37 ---------------------------
38
39 getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
40 getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
41
42 getFramedPeriod :: [PhyloGroup] -> (Date,Date)
43 getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
44
45
46 getGroupsNgrams :: [PhyloGroup] -> [Int]
47 getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
48
49
50 -- | Get the Nth most coocurent Ngrams in a list of Groups
51 getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
52 getGroupsPeaks gs nth p = getNthMostOcc nth
53 $ getSubCooc (getGroupsNgrams gs)
54 $ getCooc (getGroupsPeriods gs) p
55
56
57 areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
58 areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
59
60
61 -- | Process a Jaccard on top of two set of Branch Peaks
62 areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
63 areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
64 / ((fromIntegral . length) $ union ns ns')) >= thr
65
66
67 findSimBranches :: Int -> Double -> Int -> Phylo -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloBranchId,[PhyloGroup])]
68 findSimBranches frame thr nth p (id,gs) bs
69 = filter (\(_ ,gs') -> areTwinPeaks thr pks (getGroupsPeaks gs' nth p))
70 $ filter (\(_ ,gs') -> (not . null) $ intersect ns (getGroupsNgrams gs'))
71 $ filter (\(_ ,gs') -> areDistant prd (getFramedPeriod gs') frame)
72 $ filter (\(id',_ ) -> id /= id') bs
73 where
74 --------------------------------------
75 prd :: (Date,Date)
76 prd = getFramedPeriod gs
77 --------------------------------------
78 ns :: [Int]
79 ns = getGroupsNgrams gs
80 --------------------------------------
81 pks :: [Int]
82 pks = getGroupsPeaks gs nth p
83 --------------------------------------
84
85 findBestPointer :: Phylo -> Proximity -> [PhyloGroup] -> [PhyloGroup] -> [(PhyloGroupId,Pointer)]
86 findBestPointer p prox gs gs' = take 1
87 $ reverse
88 $ sortOn (snd . snd)
89 $ concat
90 $ map (\g -> let pts = findBestCandidates' prox gs' g p
91 in map (\pt -> (getGroupId g,pt)) pts) gs
92
93 makeBranchLinks :: Phylo -> Proximity -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloGroupId,Pointer)] -> [(PhyloGroupId,Pointer)]
94 makeBranchLinks p prox (id,gs) bs pts
95 | null bs = pts
96 | otherwise = makeBranchLinks p prox (head' "makeLink" bs) (tail bs) (pts ++ pts')
97 where
98 --------------------------------------
99 pts' :: [(PhyloGroupId,Pointer)]
100 pts' = concat $ map (\(_id,gs') -> findBestPointer p prox gs gs') candidates
101 --------------------------------------
102 candidates :: [(PhyloBranchId,[PhyloGroup])]
103 candidates = findSimBranches (getPhyloMatchingFrame p) (getPhyloReBranchThr p) (getPhyloReBranchNth p) p (id,gs) bs
104
105
106
107 linkPhyloBranches :: Level -> Proximity -> Phylo -> Phylo
108 linkPhyloBranches lvl prox p = setPhyloBranches lvl
109 $ updateGroups Descendant lvl pointers p
110 where
111 --------------------------------------
112 pointers :: Map PhyloGroupId [Pointer]
113 pointers = Map.fromList $ map (\(_id,(_id',_w)) -> (_id,[(_id',100)]))
114 $ makeBranchLinks p prox (head' "makeLink" branches) (tail branches) []
115 --------------------------------------
116 branches :: [(PhyloBranchId,[PhyloGroup])]
117 branches = sortOn (\(_id,gs) -> fst $ getFramedPeriod gs) $ getGroupsByBranches p
118 --------------------------------------
119
120
121
122
123 ------------------
124 -- | Branches | --
125 ------------------
126
127
128 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
129 graphToBranches :: Level -> GroupGraph -> Phylo -> [(Int,PhyloGroupId)]
130 graphToBranches _lvl (nodes,edges) _p = concat
131 $ map (\(idx,gs) -> map (\g -> (idx,getGroupId g)) gs)
132 $ zip [1..]
133 $ relatedComp 0 (head' "branchMaker" nodes) (tail nodes,edges) [] []
134
135
136
137 -- | To build a graph using the parents and childs pointers
138 makeGraph :: [PhyloGroup] -> Phylo -> GroupGraph
139 makeGraph gs p = (gs,edges)
140 where
141 edges :: [GroupEdge]
142 edges = (nub . concat)
143 $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
144 ++
145 (map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) gs
146
147
148 -- | To set all the PhyloBranches for a given Level in a Phylo
149 setPhyloBranches :: Level -> Phylo -> Phylo
150 setPhyloBranches lvl p = alterGroupWithLevel (\g -> let bIdx = (fst $ head' "branchMaker" $ filter (\b -> snd b == getGroupId g) bs)
151 in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
152 where
153 --------------------------------------
154 bs :: [(Int,PhyloGroupId)]
155 bs = graphToBranches lvl graph p
156 --------------------------------------
157 graph :: GroupGraph
158 graph = makeGraph (getGroupsWithLevel lvl p) p
159 --------------------------------------