2 Module : Gargantext.Viz.Phylo.TemporalMatching
3 Description : Module dedicated to the adaptative temporal matching of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Viz.Phylo.TemporalMatching where
14 import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or, sort, (!!))
15 import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
17 import Gargantext.Prelude
18 import Gargantext.Viz.AdaptativePhylo
19 import Gargantext.Viz.Phylo.PhyloTools
21 import Prelude (floor)
22 import Control.Lens hiding (Level)
23 import Control.Parallel.Strategies (parList, rdeepseq, using)
24 import Debug.Trace (trace)
26 import qualified Data.Map as Map
27 import qualified Data.Set as Set
35 -- | To compute a jaccard similarity between two lists
36 jaccard :: [Int] -> [Int] -> Double
37 jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
40 -- | Process the inverse sumLog
41 sumInvLog' :: Double -> Double -> [Double] -> Double
42 sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
45 -- | Process the sumLog
46 sumLog' :: Double -> Double -> [Double] -> Double
47 sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
50 weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
51 weightedLogJaccard' sens nbDocs diago ngrams ngrams'
52 | null ngramsInter = 0
53 | ngramsInter == ngramsUnion = 1
54 | sens == 0 = jaccard ngramsInter ngramsUnion
55 | sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
56 | otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
58 --------------------------------------
60 ngramsInter = intersect ngrams ngrams'
61 --------------------------------------
63 ngramsUnion = union ngrams ngrams'
64 --------------------------------------
65 diagoInter :: [Double]
66 diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
67 --------------------------------------
68 diagoUnion :: [Double]
69 diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
70 --------------------------------------
73 -- | To process the proximity between a current group and a pair of targets group
74 toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
75 toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
77 WeightedLogJaccard sens ->
78 let pairNgrams = if targetNgrams == targetNgrams'
80 else union targetNgrams targetNgrams'
81 in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
85 ------------------------
86 -- | Local Matching | --
87 ------------------------
89 findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
90 findLastPeriod fil periods = case fil of
91 ToParents -> head' "findLastPeriod" (sortOn fst periods)
92 ToChilds -> last' "findLastPeriod" (sortOn fst periods)
95 -- | To filter pairs of candidates related to old pointers periods
96 removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
97 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
98 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
99 removeOldPointers oldPointers fil thr prox prd pairs
100 | null oldPointers = pairs
101 | null (filterPointers prox thr oldPointers) =
102 let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
103 in if lastMatchedPrd == prd
105 else filter (\((id,_),(id',_)) ->
107 ToParents -> (((fst . fst . fst) id ) < (fst lastMatchedPrd))
108 || (((fst . fst . fst) id') < (fst lastMatchedPrd))
109 ToChilds -> (((fst . fst . fst) id ) > (fst lastMatchedPrd))
110 || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
114 makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
115 -> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
116 makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
119 else removeOldPointers oldPointers fil thr prox lastPrd
120 -- | at least on of the pair candidates should be from the last added period
121 $ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
123 $ filter (\(id,ngrams) ->
124 let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
125 diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
126 in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
129 lastPrd :: PhyloPeriodId
130 lastPrd = findLastPeriod fil periods
133 filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
134 filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
137 reduceDiagos :: Map Date Cooc -> Map Int Double
138 reduceDiagos diagos = mapKeys (\(k,_) -> k)
139 $ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
142 phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
143 -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
144 phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
145 if (null $ filterPointers proxi thr oldPointers)
146 -- | let's find new pointers
147 then if null nextPointers
149 else head' "phyloGroupMatching"
150 -- | Keep only the best set of pointers grouped by proximity
151 $ groupBy (\pt pt' -> snd pt == snd pt')
152 $ reverse $ sortOn snd $ head' "pointers" nextPointers
153 -- | Find the first time frame where at leats one pointer satisfies the proximity threshold
156 nextPointers :: [[Pointer]]
157 nextPointers = take 1
159 -- | for each time frame, process the proximity on relevant pairs of targeted groups
160 $ scanl (\acc groups ->
161 let periods = nub $ map (fst . fst . fst) $ concat groups
162 nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
164 $ filterDiago diagos ([(fst . fst) id] ++ periods)
165 -- | important resize nbdocs et diago dans le make pairs
166 pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
167 in acc ++ ( filterPointers proxi thr
170 -- | process the proximity between the current group and a pair of candidates
171 let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
173 then [(fst c,proximity)]
174 else [(fst c,proximity),(fst c',proximity)] ) pairs )) []
175 $ inits candidates -- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
178 filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
179 filterDocs d pds = restrictKeys d $ periodsToYears pds
181 filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
182 filterDiago diago pds = restrictKeys diago $ periodsToYears pds
185 -----------------------------
186 -- | Matching Processing | --
187 -----------------------------
190 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
191 getNextPeriods fil max' pId pIds =
193 ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
194 ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
197 getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
198 getCandidates ego targets =
200 filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')
204 matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
205 matchGroupsToGroups frame periods proximity thr docs coocs groups =
206 let groups' = groupByField _phylo_groupPeriod groups
207 in foldl' (\acc prd ->
208 let -- | 1) find the parents/childs matching periods
209 periodsPar = getNextPeriods ToParents frame prd periods
210 periodsChi = getNextPeriods ToChilds frame prd periods
211 -- | 2) find the parents/childs matching candidates
212 candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
213 candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
214 -- | 3) find the parents/child number of docs by years
215 docsPar = filterDocs docs ([prd] ++ periodsPar)
216 docsChi = filterDocs docs ([prd] ++ periodsChi)
217 -- | 4) find the parents/child diago by years
218 diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
219 diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
220 -- | 5) match in parallel all the groups (egos) to their possible candidates
222 let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
223 thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
224 pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi
225 thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
226 in addPointers ToChilds TemporalPointer pointersChi
227 $ addPointers ToParents TemporalPointer pointersPar ego)
228 $ findWithDefault [] prd groups'
229 egos' = egos `using` parList rdeepseq
234 -----------------------
235 -- | Phylo Quality | --
236 -----------------------
239 relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
240 relevantBranches term branches =
241 filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
243 fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
244 fScore beta i bk bks =
245 let recall = ( (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) bk)
246 / (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) $ concat bks))
247 accuracy = ( (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) bk)
248 / (fromIntegral $ length bk))
249 in ((1 + beta ** 2) * accuracy * recall)
250 / (((beta ** 2) * accuracy + recall))
253 wk :: [PhyloGroup] -> Double
254 wk bk = fromIntegral $ length bk
257 toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
258 toPhyloQuality' beta freq branches =
263 let bks = relevantBranches i branches
264 in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i bk bks)) bks))
268 ------------------------------------
269 -- | Constant Temporal Matching | --
270 ------------------------------------
273 groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
274 groupsToBranches groups =
275 -- | run the related component algorithm
276 let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
277 $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
278 $ map (\group -> [getGroupId group]
279 ++ (map fst $ group ^. phylo_groupPeriodParents)
280 ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
281 -- | first find the related components by inside each ego's period
283 graph' = map relatedComponents egos
284 -- | then run it for the all the periods
286 $ relatedComponents $ concat (graph' `using` parList rdeepseq)
287 -- | update each group's branch id
288 in map (\(bId,ids) ->
289 let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
290 $ elems $ restrictKeys groups (Set.fromList ids)
291 in groups' `using` parList rdeepseq ) graph
294 reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
295 reduceFrequency frequency branches =
296 restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
298 updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
299 updateThr thr branches = map (\b -> map (\g ->
300 g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
303 -- | Sequentially break each branch of a phylo where
304 -- done = all the allready broken branches
305 -- ego = the current branch we want to break
306 -- rest = the branches we still have to break
307 breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
308 -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
309 breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
310 -- | 1) keep or not the new division of ego
311 let done' = done ++ (if snd ego
313 (if ((null (fst ego')) || (quality > quality'))
315 -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
316 -- <> " | " <> show(length $ fst ego) <> " groups : "
317 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
318 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
321 -- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
322 -- <> " | " <> show(length $ fst ego) <> " groups : "
323 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
324 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
325 ((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
328 -- | 2) if there is no more branches in rest then return else continue
331 else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
332 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
334 --------------------------------------
336 quality = toPhyloQuality' beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
337 --------------------------------------
338 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
340 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
341 $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
342 branches' = branches `using` parList rdeepseq
343 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
345 $ depthToMeta (elevation - depth) branches'
346 --------------------------------------
348 quality' = toPhyloQuality' beta frequency
349 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
352 seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
353 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
354 seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
355 -- | if there is no branch to break or if seaLvl level > 1 then end
356 if (thr >= 1) || ((not . or) $ map snd branches)
359 -- | break all the possible branches at the current seaLvl level
360 let branches' = breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
361 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
362 frequency' = reduceFrequency frequency (map fst branches')
363 in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
366 constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
367 constanteTemporalMatching start step phylo = updatePhyloGroups 1
368 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
371 -- | 2) process the temporal matching by elevating seaLvl level
372 branches :: [[PhyloGroup]]
374 $ seaLevelMatching (phyloProximity $ getConfig phylo)
375 (_qua_granularity $ phyloQuality $ getConfig phylo)
376 (_qua_minBranch $ phyloQuality $ getConfig phylo)
377 (phylo ^. phylo_termFreq)
379 ((((1 - start) / step) - 1))
380 (((1 - start) / step))
381 (getTimeFrame $ timeUnit $ getConfig phylo)
383 (phylo ^. phylo_timeDocs)
384 (phylo ^. phylo_timeCooc)
386 -- | 1) for each group process an initial temporal Matching
387 -- | here we suppose that all the groups of level 1 are part of the same big branch
388 groups :: [([PhyloGroup],Bool)]
389 groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
390 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
391 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
392 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
394 (phylo ^. phylo_timeDocs)
395 (phylo ^. phylo_timeCooc)
396 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
399 --------------------------------------
400 -- | Adaptative Temporal Matching | --
401 --------------------------------------
404 thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
405 thrToMeta thr branches =
407 map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
409 depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
410 depthToMeta depth branches =
411 let break = length branches > 1
414 if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
417 reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
418 reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
421 getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
423 | isJust (m !? ( k ,k')) = m ! ( k ,k')
424 | isJust (m !? ( k',k )) = m ! ( k',k )
428 toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
429 toThreshold lvl proxiGroups =
430 let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
432 then (sort $ elems proxiGroups) !! idx
436 -- done = all the allready broken branches
437 -- ego = the current branch we want to break
438 -- rest = the branches we still have to break
439 adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
440 -> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
441 -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
442 -> [([PhyloGroup],(Bool,[Double]))]
443 adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
444 -- | 1) keep or not the new division of ego
445 let done' = done ++ (if (fst . snd) ego
446 then (if ((null (fst ego')) || (quality > quality'))
448 [(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
450 ( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
451 ++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
452 else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
454 -- | uncomment let .. in for debugging
455 -- let part1 = partition (snd) done'
456 -- part2 = partition (snd) rest
457 -- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
458 -- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
460 -- | 2) if there is no more branches in rest then return else continue
463 else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
464 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
466 --------------------------------------
468 thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
469 --------------------------------------
471 quality = toPhyloQuality' beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
472 --------------------------------------
473 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
475 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
476 $ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
477 branches' = branches `using` parList rdeepseq
478 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
480 $ depthToMeta (elevation - depth) branches'
481 --------------------------------------
483 quality' = toPhyloQuality' beta frequency
484 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
487 adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
488 -> Double -> Int -> Map Int Double
489 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
490 -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
491 adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
492 -- | if there is no branch to break or if seaLvl level >= depth then end
493 if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
496 -- | break all the possible branches at the current seaLvl level
497 let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
498 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
499 frequency' = reduceFrequency frequency (map fst branches')
500 groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
501 -- thr = toThreshold depth groupsProxi
502 in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
503 <> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
504 <> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
506 $ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches'
509 adaptativeTemporalMatching :: Double -> Phylo -> Phylo
510 adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
511 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
514 -- | 2) process the temporal matching by elevating seaLvl level
515 branches :: [[PhyloGroup]]
517 $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
520 (phylo ^. phylo_groupsProxi)
521 (_qua_granularity $ phyloQuality $ getConfig phylo)
522 (_qua_minBranch $ phyloQuality $ getConfig phylo)
523 (phylo ^. phylo_termFreq)
524 (getTimeFrame $ timeUnit $ getConfig phylo)
526 (phylo ^. phylo_timeDocs)
527 (phylo ^. phylo_timeCooc)
529 -- | 1) for each group process an initial temporal Matching
530 -- | here we suppose that all the groups of level 1 are part of the same big branch
531 groups :: [([PhyloGroup],(Bool,[Double]))]
532 groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
533 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
534 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
535 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
537 (phylo ^. phylo_timeDocs)
538 (phylo ^. phylo_timeCooc)
539 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
540 --------------------------------------
542 thr = toThreshold elevation (phylo ^. phylo_groupsProxi)