]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LinkMaker.hs
add trace to cluster and optimisation to find candidates
[gargantext.git] / src / Gargantext / Viz / Phylo / LinkMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.LinkMaker
18 where
19
20 import Control.Lens hiding (both, Level)
21 import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete, intersect)
22 import Data.Tuple.Extra
23 import Data.Map (Map,(!))
24 import Gargantext.Prelude
25 import Gargantext.Viz.Phylo
26 import Gargantext.Viz.Phylo.Tools
27 import Gargantext.Viz.Phylo.Metrics.Proximity
28 import qualified Data.List as List
29 import qualified Data.Maybe as Maybe
30 import qualified Data.Map as Map
31
32 import qualified Data.Vector.Storable as VS
33 import Debug.Trace (trace)
34 import Numeric.Statistics (percentile)
35
36
37 ------------------------------------------------------------------------
38 -- | Make links from Level to Level
39
40
41 -- | To choose a LevelLink strategy based an a given Level
42 shouldLink :: (Level,Level) -> PhyloGroup -> PhyloGroup -> Bool
43 shouldLink (lvl,_lvl) g g'
44 | lvl <= 1 = doesContainsOrd (getGroupNgrams g) (getGroupNgrams g')
45 | lvl > 1 = elem (getGroupId g) (getGroupLevelChildsId g')
46 | otherwise = panic ("[ERR][Viz.Phylo.LinkMaker.shouldLink] Level not defined")
47
48
49 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
50 linkGroupToGroups :: (Level,Level) -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
51 linkGroupToGroups (lvl,lvl') current targets
52 | lvl < lvl' = setLevelParents current
53 | lvl > lvl' = setLevelChilds current
54 | otherwise = current
55 where
56 --------------------------------------
57 setLevelChilds :: PhyloGroup -> PhyloGroup
58 setLevelChilds = over (phylo_groupLevelChilds) addPointers
59 --------------------------------------
60 setLevelParents :: PhyloGroup -> PhyloGroup
61 setLevelParents = over (phylo_groupLevelParents) addPointers
62 --------------------------------------
63 addPointers :: [Pointer] -> [Pointer]
64 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
65 if shouldLink (lvl,lvl') current target
66 then Just ((getGroupId target),1)
67 else Nothing) targets
68 --------------------------------------
69
70
71 -- | To set the LevelLink of all the PhyloGroups of a Phylo
72 setLevelLinks :: (Level,Level) -> Phylo -> Phylo
73 setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLevel g == lvl
74 then linkGroupToGroups (lvl,lvl') g (filter (\g' -> getGroupPeriod g' == getGroupPeriod g) gs')
75 else g) gs) p
76 where
77 --------------------------------------
78 gs' :: [PhyloGroup]
79 gs' = getGroupsWithLevel lvl' p
80 --------------------------------------
81
82
83 ------------------------------------------------------------------------
84 -- | Make links from Period to Period
85
86
87 -- | To apply the corresponding proximity function based on a given Proximity
88 applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
89 applyProximity prox g1 g2 = case prox of
90 -- WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2),weightedLogJaccard s (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
91 -- Hamming (HammingParams _) -> ((getGroupId g2),hamming (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
92 WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getGroupCooc g1) (getGroupCooc g2))
93 Hamming (HammingParams _) -> ((getGroupId g2), hamming (getGroupCooc g1) (getGroupCooc g2))
94 _ -> panic ("[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined")
95
96
97 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
98 getNextPeriods :: Filiation -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
99 getNextPeriods to' id l = case to' of
100 Descendant -> (tail . snd) next
101 Ascendant -> (reverse . fst) next
102 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined")
103 where
104 --------------------------------------
105 next :: ([PhyloPeriodId], [PhyloPeriodId])
106 next = splitAt idx l
107 --------------------------------------
108 idx :: Int
109 idx = case (List.elemIndex id l) of
110 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
111 Just i -> i
112 --------------------------------------
113
114
115 -- | To find the best candidates regarding a given proximity
116 findBestCandidates' :: Filiation -> Int -> Int -> Proximity -> [PhyloPeriodId] -> [PhyloGroup] -> PhyloGroup -> ([Pointer],[Double])
117 findBestCandidates' fil depth limit prox prds gs g
118 | depth > limit || null next = ([],[])
119 | (not . null) bestScores = (take 2 bestScores, map snd scores)
120 | otherwise = findBestCandidates' fil (depth + 1) limit prox prds gs g
121 where
122 --------------------------------------
123 next :: [PhyloPeriodId]
124 next = take depth prds
125 --------------------------------------
126 candidates :: [PhyloGroup]
127 candidates = filter (\g' -> elem (getGroupPeriod g') next) gs
128 --------------------------------------
129 scores :: [(PhyloGroupId, Double)]
130 scores = map (\g' -> applyProximity prox g g') candidates
131 --------------------------------------
132 bestScores :: [(PhyloGroupId, Double)]
133 bestScores = reverse
134 $ sortOn snd
135 $ filter (\(_id,score) -> case prox of
136 WeightedLogJaccard (WLJParams thr _) -> score >= thr
137 Hamming (HammingParams thr) -> score <= thr
138 Filiation -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Filiation"
139 ) scores
140 --------------------------------------
141
142
143 -- | To add some Pointer to a PhyloGroup
144 addPointers' :: Filiation -> [Pointer] -> PhyloGroup -> PhyloGroup
145 addPointers' fil pts g = g & case fil of
146 Descendant -> phylo_groupPeriodChilds %~ (++ pts)
147 Ascendant -> phylo_groupPeriodParents %~ (++ pts)
148 _ -> panic ("[ERR][Viz.Phylo.LinkMaker.addPointers] Wrong type of filiation")
149
150
151
152 -- | To update a list of pkyloGroups with some Pointers
153 updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo
154 updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if (getGroupLevel g) == lvl
155 then addPointers' fil (m ! (getGroupId g)) g
156 else g ) gs) p
157
158
159
160 -- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
161 filterCandidates :: PhyloGroup -> [PhyloGroup] -> [PhyloGroup]
162 filterCandidates g gs = filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
163 $ delete g gs
164
165
166
167 -- | To apply the intertemporal matching to Phylo at a given level
168 interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
169 interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) scores
170 $ updateGroups fil lvl pointers p
171 where
172 --------------------------------------
173 pointers :: Map PhyloGroupId [Pointer]
174 pointers = Map.fromList $ map (\(id,x) -> (id,fst x)) candidates
175 --------------------------------------
176 scores :: [Double]
177 scores = sort $ concat $ map (snd . snd) candidates
178 --------------------------------------
179 candidates :: [(PhyloGroupId,([Pointer],[Double]))]
180 candidates = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g)) gs
181 --------------------------------------
182 gs :: [PhyloGroup]
183 gs = getGroupsWithLevel lvl p
184 --------------------------------------
185 prds :: [PhyloPeriodId]
186 prds = getPhyloPeriods p
187 --------------------------------------
188
189
190 ----------------
191 -- | Tracer | --
192 ----------------
193
194
195 traceMatching :: Filiation -> Level -> Double -> [Double] -> Phylo -> Phylo
196 traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered temporal Matching in Phylo" <> show (lvl) <> " :\n"
197 <> "count : " <> show (length lst) <> " potential pointers (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
198 <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
199 <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
200 <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
201 <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
202