2 Module : Gargantext.Viz.Phylo.TemporalMatching
3 Description : Module dedicated to the adaptative temporal matching of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
16 module Gargantext.Viz.Phylo.TemporalMatching where
18 import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or, sort, (!!))
19 import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
21 import Gargantext.Prelude
22 import Gargantext.Viz.AdaptativePhylo
23 import Gargantext.Viz.Phylo.PhyloTools
25 import Prelude (floor)
26 import Control.Lens hiding (Level)
27 import Control.Parallel.Strategies (parList, rdeepseq, using)
28 import Debug.Trace (trace)
30 import qualified Data.Map as Map
31 import qualified Data.Set as Set
39 -- | To compute a jaccard similarity between two lists
40 jaccard :: [Int] -> [Int] -> Double
41 jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
44 -- | Process the inverse sumLog
45 sumInvLog' :: Double -> Double -> [Double] -> Double
46 sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
49 -- | Process the sumLog
50 sumLog' :: Double -> Double -> [Double] -> Double
51 sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
54 weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
55 weightedLogJaccard' sens nbDocs diago ngrams ngrams'
56 | null ngramsInter = 0
57 | ngramsInter == ngramsUnion = 1
58 | sens == 0 = jaccard ngramsInter ngramsUnion
59 | sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
60 | otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
62 --------------------------------------
64 ngramsInter = intersect ngrams ngrams'
65 --------------------------------------
67 ngramsUnion = union ngrams ngrams'
68 --------------------------------------
69 diagoInter :: [Double]
70 diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
71 --------------------------------------
72 diagoUnion :: [Double]
73 diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
74 --------------------------------------
77 -- | To process the proximity between a current group and a pair of targets group
78 toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
79 toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
81 WeightedLogJaccard sens ->
82 let pairNgrams = if targetNgrams == targetNgrams'
84 else union targetNgrams targetNgrams'
85 in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
89 ------------------------
90 -- | Local Matching | --
91 ------------------------
93 findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
94 findLastPeriod fil periods = case fil of
95 ToParents -> head' "findLastPeriod" (sortOn fst periods)
96 ToChilds -> last' "findLastPeriod" (sortOn fst periods)
99 -- | To filter pairs of candidates related to old pointers periods
100 removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
101 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
102 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
103 removeOldPointers oldPointers fil thr prox prd pairs
104 | null oldPointers = pairs
105 | null (filterPointers prox thr oldPointers) =
106 let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
107 in if lastMatchedPrd == prd
109 else filter (\((id,_),(id',_)) ->
111 ToParents -> (((fst . fst . fst) id ) < (fst lastMatchedPrd))
112 || (((fst . fst . fst) id') < (fst lastMatchedPrd))
113 ToChilds -> (((fst . fst . fst) id ) > (fst lastMatchedPrd))
114 || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
118 makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
119 -> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
120 makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
123 else removeOldPointers oldPointers fil thr prox lastPrd
124 -- | at least on of the pair candidates should be from the last added period
125 $ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
127 $ filter (\(id,ngrams) ->
128 let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
129 diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
130 in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
133 lastPrd :: PhyloPeriodId
134 lastPrd = findLastPeriod fil periods
137 filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
138 filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
141 reduceDiagos :: Map Date Cooc -> Map Int Double
142 reduceDiagos diagos = mapKeys (\(k,_) -> k)
143 $ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
146 phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
147 -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
148 phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
149 if (null $ filterPointers proxi thr oldPointers)
150 -- | let's find new pointers
151 then if null nextPointers
153 else head' "phyloGroupMatching"
154 -- | Keep only the best set of pointers grouped by proximity
155 $ groupBy (\pt pt' -> snd pt == snd pt')
156 $ reverse $ sortOn snd $ head' "pointers" nextPointers
157 -- | Find the first time frame where at leats one pointer satisfies the proximity threshold
160 nextPointers :: [[Pointer]]
161 nextPointers = take 1
163 -- | for each time frame, process the proximity on relevant pairs of targeted groups
164 $ scanl (\acc groups ->
165 let periods = nub $ map (fst . fst . fst) $ concat groups
166 nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
168 $ filterDiago diagos ([(fst . fst) id] ++ periods)
169 -- | important resize nbdocs et diago dans le make pairs
170 pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
171 in acc ++ ( filterPointers proxi thr
174 -- | process the proximity between the current group and a pair of candidates
175 let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
177 then [(fst c,proximity)]
178 else [(fst c,proximity),(fst c',proximity)] ) pairs )) []
179 $ inits candidates -- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
182 filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
183 filterDocs d pds = restrictKeys d $ periodsToYears pds
185 filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
186 filterDiago diago pds = restrictKeys diago $ periodsToYears pds
189 -----------------------------
190 -- | Matching Processing | --
191 -----------------------------
194 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
195 getNextPeriods fil max' pId pIds =
197 ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
198 ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
201 getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
202 getCandidates ego targets =
204 filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')
208 matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
209 matchGroupsToGroups frame periods proximity thr docs coocs groups =
210 let groups' = groupByField _phylo_groupPeriod groups
211 in foldl' (\acc prd ->
212 let -- | 1) find the parents/childs matching periods
213 periodsPar = getNextPeriods ToParents frame prd periods
214 periodsChi = getNextPeriods ToChilds frame prd periods
215 -- | 2) find the parents/childs matching candidates
216 candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
217 candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
218 -- | 3) find the parents/child number of docs by years
219 docsPar = filterDocs docs ([prd] ++ periodsPar)
220 docsChi = filterDocs docs ([prd] ++ periodsChi)
221 -- | 4) find the parents/child diago by years
222 diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
223 diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
224 -- | 5) match in parallel all the groups (egos) to their possible candidates
226 let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
227 thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
228 pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi
229 thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
230 in addPointers ToChilds TemporalPointer pointersChi
231 $ addPointers ToParents TemporalPointer pointersPar ego)
232 $ findWithDefault [] prd groups'
233 egos' = egos `using` parList rdeepseq
238 -----------------------
239 -- | Phylo Quality | --
240 -----------------------
243 relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
244 relevantBranches term branches =
245 filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
247 fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
248 fScore beta i bk bks =
249 let recall = ( (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) bk)
250 / (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) $ concat bks))
251 accuracy = ( (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) bk)
252 / (fromIntegral $ length bk))
253 in ((1 + beta ** 2) * accuracy * recall)
254 / (((beta ** 2) * accuracy + recall))
257 wk :: [PhyloGroup] -> Double
258 wk bk = fromIntegral $ length bk
261 toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
262 toPhyloQuality' beta freq branches =
267 let bks = relevantBranches i branches
268 in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i bk bks)) bks))
272 ------------------------------------
273 -- | Constant Temporal Matching | --
274 ------------------------------------
277 groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
278 groupsToBranches groups =
279 -- | run the related component algorithm
280 let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
281 $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
282 $ map (\group -> [getGroupId group]
283 ++ (map fst $ group ^. phylo_groupPeriodParents)
284 ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
285 -- | first find the related components by inside each ego's period
287 graph' = map relatedComponents egos
288 -- | then run it for the all the periods
290 $ relatedComponents $ concat (graph' `using` parList rdeepseq)
291 -- | update each group's branch id
292 in map (\(bId,ids) ->
293 let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
294 $ elems $ restrictKeys groups (Set.fromList ids)
295 in groups' `using` parList rdeepseq ) graph
298 reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
299 reduceFrequency frequency branches =
300 restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
302 updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
303 updateThr thr branches = map (\b -> map (\g ->
304 g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
307 -- | Sequentially break each branch of a phylo where
308 -- done = all the allready broken branches
309 -- ego = the current branch we want to break
310 -- rest = the branches we still have to break
311 breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
312 -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
313 breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
314 -- | 1) keep or not the new division of ego
315 let done' = done ++ (if snd ego
317 (if ((null (fst ego')) || (quality > quality'))
319 -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
320 -- <> " | " <> show(length $ fst ego) <> " groups : "
321 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
322 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
325 -- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
326 -- <> " | " <> show(length $ fst ego) <> " groups : "
327 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
328 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
329 ((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
332 -- | 2) if there is no more branches in rest then return else continue
335 else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
336 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
338 --------------------------------------
340 quality = toPhyloQuality' beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
341 --------------------------------------
342 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
344 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
345 $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
346 branches' = branches `using` parList rdeepseq
347 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
349 $ depthToMeta (elevation - depth) branches'
350 --------------------------------------
352 quality' = toPhyloQuality' beta frequency
353 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
356 seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
357 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
358 seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
359 -- | if there is no branch to break or if seaLvl level > 1 then end
360 if (thr >= 1) || ((not . or) $ map snd branches)
363 -- | break all the possible branches at the current seaLvl level
364 let branches' = breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
365 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
366 frequency' = reduceFrequency frequency (map fst branches')
367 in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
370 constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
371 constanteTemporalMatching start step phylo = updatePhyloGroups 1
372 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
375 -- | 2) process the temporal matching by elevating seaLvl level
376 branches :: [[PhyloGroup]]
378 $ seaLevelMatching (phyloProximity $ getConfig phylo)
379 (_qua_granularity $ phyloQuality $ getConfig phylo)
380 (_qua_minBranch $ phyloQuality $ getConfig phylo)
381 (phylo ^. phylo_termFreq)
383 (fromIntegral $ round (((1 - start) / step) - 1))
384 (fromIntegral $ round ((1 - start) / step))
385 (getTimeFrame $ timeUnit $ getConfig phylo)
387 (phylo ^. phylo_timeDocs)
388 (phylo ^. phylo_timeCooc)
390 -- | 1) for each group process an initial temporal Matching
391 -- | here we suppose that all the groups of level 1 are part of the same big branch
392 groups :: [([PhyloGroup],Bool)]
393 groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
394 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
395 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
396 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
398 (phylo ^. phylo_timeDocs)
399 (phylo ^. phylo_timeCooc)
400 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
403 --------------------------------------
404 -- | Adaptative Temporal Matching | --
405 --------------------------------------
408 thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
409 thrToMeta thr branches =
411 map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
413 depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
414 depthToMeta depth branches =
415 let break = length branches > 1
418 if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
421 reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
422 reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
425 getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
427 | isJust (m !? ( k ,k')) = m ! ( k ,k')
428 | isJust (m !? ( k',k )) = m ! ( k',k )
432 toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
433 toThreshold lvl proxiGroups =
434 let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
436 then (sort $ elems proxiGroups) !! idx
440 -- done = all the allready broken branches
441 -- ego = the current branch we want to break
442 -- rest = the branches we still have to break
443 adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
444 -> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
445 -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
446 -> [([PhyloGroup],(Bool,[Double]))]
447 adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
448 -- | 1) keep or not the new division of ego
449 let done' = done ++ (if (fst . snd) ego
450 then (if ((null (fst ego')) || (quality > quality'))
452 [(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
454 ( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
455 ++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
456 else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
458 -- | uncomment let .. in for debugging
459 -- let part1 = partition (snd) done'
460 -- part2 = partition (snd) rest
461 -- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
462 -- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
464 -- | 2) if there is no more branches in rest then return else continue
467 else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
468 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
470 --------------------------------------
472 thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
473 --------------------------------------
475 quality = toPhyloQuality' beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
476 --------------------------------------
477 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
479 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
480 $ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
481 branches' = branches `using` parList rdeepseq
482 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
484 $ depthToMeta (elevation - depth) branches'
485 --------------------------------------
487 quality' = toPhyloQuality' beta frequency
488 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
491 adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
492 -> Double -> Int -> Map Int Double
493 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
494 -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
495 adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
496 -- | if there is no branch to break or if seaLvl level >= depth then end
497 if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
500 -- | break all the possible branches at the current seaLvl level
501 let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
502 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
503 frequency' = reduceFrequency frequency (map fst branches')
504 groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
505 -- thr = toThreshold depth groupsProxi
506 in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
507 <> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
508 <> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
510 $ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches'
513 adaptativeTemporalMatching :: Double -> Phylo -> Phylo
514 adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
515 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
518 -- | 2) process the temporal matching by elevating seaLvl level
519 branches :: [[PhyloGroup]]
521 $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
524 (phylo ^. phylo_groupsProxi)
525 (_qua_granularity $ phyloQuality $ getConfig phylo)
526 (_qua_minBranch $ phyloQuality $ getConfig phylo)
527 (phylo ^. phylo_termFreq)
528 (getTimeFrame $ timeUnit $ getConfig phylo)
530 (phylo ^. phylo_timeDocs)
531 (phylo ^. phylo_timeCooc)
533 -- | 1) for each group process an initial temporal Matching
534 -- | here we suppose that all the groups of level 1 are part of the same big branch
535 groups :: [([PhyloGroup],(Bool,[Double]))]
536 groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
537 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
538 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
539 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
541 (phylo ^. phylo_timeDocs)
542 (phylo ^. phylo_timeCooc)
543 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
544 --------------------------------------
546 thr = toThreshold elevation (phylo ^. phylo_groupsProxi)