2 Module : Gargantext.Viz.Phylo.TemporalMatching
3 Description : Module dedicated to the adaptative temporal matching of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
16 module Gargantext.Viz.Phylo.TemporalMatching where
18 import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or, sort, (!!))
19 import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
21 import Gargantext.Prelude
22 import Gargantext.Viz.AdaptativePhylo
23 import Gargantext.Viz.Phylo.PhyloTools
25 import Prelude (floor)
26 import Control.Lens hiding (Level)
27 import Control.Parallel.Strategies (parList, rdeepseq, using)
28 import Debug.Trace (trace)
32 import qualified Data.Map as Map
33 import qualified Data.Set as Set
41 -- | To compute a jaccard similarity between two lists
42 jaccard :: [Int] -> [Int] -> Double
43 jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
46 -- | Process the inverse sumLog
47 sumInvLog' :: Double -> Double -> [Double] -> Double
48 sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
51 -- | Process the sumLog
52 sumLog' :: Double -> Double -> [Double] -> Double
53 sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
56 weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
57 weightedLogJaccard' sens nbDocs diago ngrams ngrams'
58 | null ngramsInter = 0
59 | ngramsInter == ngramsUnion = 1
60 | sens == 0 = jaccard ngramsInter ngramsUnion
61 | sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
62 | otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
64 --------------------------------------
66 ngramsInter = intersect ngrams ngrams'
67 --------------------------------------
69 ngramsUnion = union ngrams ngrams'
70 --------------------------------------
71 diagoInter :: [Double]
72 diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
73 --------------------------------------
74 diagoUnion :: [Double]
75 diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
76 --------------------------------------
79 -- | To process the proximity between a current group and a pair of targets group
80 toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
81 toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
83 WeightedLogJaccard sens ->
84 let pairNgrams = if targetNgrams == targetNgrams'
86 else union targetNgrams targetNgrams'
87 in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
91 ------------------------
92 -- | Local Matching | --
93 ------------------------
95 findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
96 findLastPeriod fil periods = case fil of
97 ToParents -> head' "findLastPeriod" (sortOn fst periods)
98 ToChilds -> last' "findLastPeriod" (sortOn fst periods)
101 -- | To filter pairs of candidates related to old pointers periods
102 removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
103 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
104 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
105 removeOldPointers oldPointers fil thr prox prd pairs
106 | null oldPointers = pairs
107 | null (filterPointers prox thr oldPointers) =
108 let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
109 in if lastMatchedPrd == prd
111 else filter (\((id,_),(id',_)) ->
113 ToParents -> (((fst . fst . fst) id ) < (fst lastMatchedPrd))
114 || (((fst . fst . fst) id') < (fst lastMatchedPrd))
115 ToChilds -> (((fst . fst . fst) id ) > (fst lastMatchedPrd))
116 || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
120 makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
121 -> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
122 makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
125 else removeOldPointers oldPointers fil thr prox lastPrd
126 -- | at least on of the pair candidates should be from the last added period
127 $ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
129 $ filter (\(id,ngrams) ->
130 let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
131 diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
132 in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
135 lastPrd :: PhyloPeriodId
136 lastPrd = findLastPeriod fil periods
139 filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
140 filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
143 reduceDiagos :: Map Date Cooc -> Map Int Double
144 reduceDiagos diagos = mapKeys (\(k,_) -> k)
145 $ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
148 phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
149 -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
150 phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
151 if (null $ filterPointers proxi thr oldPointers)
152 -- | let's find new pointers
153 then if null nextPointers
155 else head' "phyloGroupMatching"
156 -- | Keep only the best set of pointers grouped by proximity
157 $ groupBy (\pt pt' -> snd pt == snd pt')
158 $ reverse $ sortOn snd $ head' "pointers" nextPointers
159 -- | Find the first time frame where at leats one pointer satisfies the proximity threshold
162 nextPointers :: [[Pointer]]
163 nextPointers = take 1
165 -- | for each time frame, process the proximity on relevant pairs of targeted groups
166 $ scanl (\acc groups ->
167 let periods = nub $ map (fst . fst . fst) $ concat groups
168 nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
170 $ filterDiago diagos ([(fst . fst) id] ++ periods)
171 -- | important resize nbdocs et diago dans le make pairs
172 pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
173 in acc ++ ( filterPointers proxi thr
176 -- | process the proximity between the current group and a pair of candidates
177 let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
179 then [(fst c,proximity)]
180 else [(fst c,proximity),(fst c',proximity)] ) pairs )) []
181 $ inits candidates -- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
184 filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
185 filterDocs d pds = restrictKeys d $ periodsToYears pds
187 filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
188 filterDiago diago pds = restrictKeys diago $ periodsToYears pds
191 -----------------------------
192 -- | Matching Processing | --
193 -----------------------------
196 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
197 getNextPeriods fil max' pId pIds =
199 ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
200 ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
203 getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
204 getCandidates ego targets =
206 filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')
210 matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
211 matchGroupsToGroups frame periods proximity thr docs coocs groups =
212 let groups' = groupByField _phylo_groupPeriod groups
213 in foldl' (\acc prd ->
214 let -- | 1) find the parents/childs matching periods
215 periodsPar = getNextPeriods ToParents frame prd periods
216 periodsChi = getNextPeriods ToChilds frame prd periods
217 -- | 2) find the parents/childs matching candidates
218 candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
219 candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
220 -- | 3) find the parents/child number of docs by years
221 docsPar = filterDocs docs ([prd] ++ periodsPar)
222 docsChi = filterDocs docs ([prd] ++ periodsChi)
223 -- | 4) find the parents/child diago by years
224 diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
225 diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
226 -- | 5) match in parallel all the groups (egos) to their possible candidates
228 let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
229 thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
230 pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi
231 thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
232 in addPointers ToChilds TemporalPointer pointersChi
233 $ addPointers ToParents TemporalPointer pointersPar ego)
234 $ findWithDefault [] prd groups'
235 egos' = egos `using` parList rdeepseq
240 -----------------------
241 -- | Phylo Quality | --
242 -----------------------
245 relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
246 relevantBranches term branches =
247 filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
249 fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
250 fScore beta x bk bx =
251 let recall = ( (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
252 / (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
253 accuracy = ( (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
254 / (fromIntegral $ length bk))
255 in ((1 + (1 / beta) ** 2) * accuracy * recall)
256 / ((((1 / beta) ** 2) * accuracy + recall))
259 wk :: [PhyloGroup] -> Double
260 wk bk = fromIntegral $ length bk
263 toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
264 toPhyloQuality' beta freq branches =
269 let bks = relevantBranches i branches
270 in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i bk bks)) bks))
273 -- | here we do the average of all the local f_scores
274 toPhyloQuality :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
275 toPhyloQuality beta freq branches =
281 bx = relevantBranches x branches
282 wks = sum $ map wk bx
283 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x bk bx)) bx))
287 pys = sum (elems freq)
290 ------------------------------------
291 -- | Constant Temporal Matching | --
292 ------------------------------------
295 groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
296 groupsToBranches groups =
297 -- | run the related component algorithm
298 let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
299 $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
300 $ map (\group -> [getGroupId group]
301 ++ (map fst $ group ^. phylo_groupPeriodParents)
302 ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
303 -- | first find the related components by inside each ego's period
305 graph' = map relatedComponents egos
306 -- | then run it for the all the periods
308 $ relatedComponents $ concat (graph' `using` parList rdeepseq)
309 -- | update each group's branch id
310 in map (\(bId,ids) ->
311 let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
312 $ elems $ restrictKeys groups (Set.fromList ids)
313 in groups' `using` parList rdeepseq ) graph
316 reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
317 reduceFrequency frequency branches =
318 restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
320 updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
321 updateThr thr branches = map (\b -> map (\g ->
322 g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
325 -- | Sequentially break each branch of a phylo where
326 -- done = all the allready broken branches
327 -- ego = the current branch we want to break
328 -- rest = the branches we still have to break
329 breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
330 -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
331 breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
332 -- | 1) keep or not the new division of ego
333 let done' = done ++ (if snd ego
335 (if ((null (fst ego')) || (quality > quality'))
337 -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
338 -- <> " | " <> show(length $ fst ego) <> " groups : "
339 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
340 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
343 -- trace (" ✓ level = " <> printf "%.1f" thr <> "")
344 -- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
345 -- <> " | " <> show(length $ fst ego) <> " groups : "
346 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
347 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
348 ((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
351 -- | 2) if there is no more branches in rest then return else continue
354 else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
355 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
357 --------------------------------------
359 quality = toPhyloQuality beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
360 --------------------------------------
361 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
363 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
364 $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
365 branches' = branches `using` parList rdeepseq
366 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
368 $ depthToMeta (elevation - depth) branches'
369 --------------------------------------
371 quality' = toPhyloQuality beta frequency
372 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
375 seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
376 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
377 seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
378 -- | if there is no branch to break or if seaLvl level > 1 then end
379 if (thr >= 1) || ((not . or) $ map snd branches)
382 -- | break all the possible branches at the current seaLvl level
383 let quality = toPhyloQuality beta frequency (map fst branches)
384 branches' = trace ("↑ level = " <> printf "%.1f" thr <> " F(β) = " <> printf "%.3f" quality <> " branches = " <> show(length branches))
385 $ breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
386 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
387 frequency' = reduceFrequency frequency (map fst branches')
388 in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
391 constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
392 constanteTemporalMatching start step phylo = updatePhyloGroups 1
393 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
394 (toPhyloHorizon phylo)
396 -- | 2) process the temporal matching by elevating seaLvl level
397 branches :: [[PhyloGroup]]
399 $ seaLevelMatching (phyloProximity $ getConfig phylo)
400 (_qua_granularity $ phyloQuality $ getConfig phylo)
401 (_qua_minBranch $ phyloQuality $ getConfig phylo)
402 (phylo ^. phylo_termFreq)
404 ((((1 - start) / step) - 1))
405 (((1 - start) / step))
406 (getTimeFrame $ timeUnit $ getConfig phylo)
408 (phylo ^. phylo_timeDocs)
409 (phylo ^. phylo_timeCooc)
411 -- | 1) for each group process an initial temporal Matching
412 -- | here we suppose that all the groups of level 1 are part of the same big branch
413 groups :: [([PhyloGroup],Bool)]
414 groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
415 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
416 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
417 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
419 (phylo ^. phylo_timeDocs)
420 (phylo ^. phylo_timeCooc)
421 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
427 toPhyloHorizon :: Phylo -> Phylo
428 toPhyloHorizon phylo =
429 let t0 = take 1 (getPeriodIds phylo)
430 groups = getGroupsFromLevelPeriods 1 t0 phylo
431 sens = getSensibility (phyloProximity $ getConfig phylo)
432 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) t0
433 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) t0
434 in phylo & phylo_horizon .~ (fromList $ map (\(g,g') ->
435 ((getGroupId g,getGroupId g'),weightedLogJaccard' sens nbDocs diago (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) $ listToCombi' groups)
438 --------------------------------------
439 -- | Adaptative Temporal Matching | --
440 --------------------------------------
443 thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
444 thrToMeta thr branches =
446 map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
448 depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
449 depthToMeta depth branches =
450 let break = length branches > 1
453 if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
456 reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
457 reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
460 getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
462 | isJust (m !? ( k ,k')) = m ! ( k ,k')
463 | isJust (m !? ( k',k )) = m ! ( k',k )
467 toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
468 toThreshold lvl proxiGroups =
469 let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
471 then (sort $ elems proxiGroups) !! idx
475 -- done = all the allready broken branches
476 -- ego = the current branch we want to break
477 -- rest = the branches we still have to break
478 adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
479 -> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
480 -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
481 -> [([PhyloGroup],(Bool,[Double]))]
482 adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
483 -- | 1) keep or not the new division of ego
484 let done' = done ++ (if (fst . snd) ego
485 then (if ((null (fst ego')) || (quality > quality'))
487 [(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
489 ( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
490 ++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
491 else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
493 -- | uncomment let .. in for debugging
494 -- let part1 = partition (snd) done'
495 -- part2 = partition (snd) rest
496 -- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
497 -- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
499 -- | 2) if there is no more branches in rest then return else continue
502 else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
503 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
505 --------------------------------------
507 thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
508 --------------------------------------
510 quality = toPhyloQuality beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
511 --------------------------------------
512 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
514 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
515 $ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
516 branches' = branches `using` parList rdeepseq
517 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
519 $ depthToMeta (elevation - depth) branches'
520 --------------------------------------
522 quality' = toPhyloQuality beta frequency
523 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
526 adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
527 -> Double -> Int -> Map Int Double
528 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
529 -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
530 adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
531 -- | if there is no branch to break or if seaLvl level >= depth then end
532 if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
535 -- | break all the possible branches at the current seaLvl level
536 let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
537 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
538 frequency' = reduceFrequency frequency (map fst branches')
539 groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
540 -- thr = toThreshold depth groupsProxi
541 in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
542 <> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
543 <> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
545 $ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches'
548 adaptativeTemporalMatching :: Double -> Phylo -> Phylo
549 adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
550 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
551 (toPhyloHorizon phylo)
553 -- | 2) process the temporal matching by elevating seaLvl level
554 branches :: [[PhyloGroup]]
556 $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
559 (phylo ^. phylo_groupsProxi)
560 (_qua_granularity $ phyloQuality $ getConfig phylo)
561 (_qua_minBranch $ phyloQuality $ getConfig phylo)
562 (phylo ^. phylo_termFreq)
563 (getTimeFrame $ timeUnit $ getConfig phylo)
565 (phylo ^. phylo_timeDocs)
566 (phylo ^. phylo_timeCooc)
568 -- | 1) for each group process an initial temporal Matching
569 -- | here we suppose that all the groups of level 1 are part of the same big branch
570 groups :: [([PhyloGroup],(Bool,[Double]))]
571 groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
572 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
573 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
574 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
576 (phylo ^. phylo_timeDocs)
577 (phylo ^. phylo_timeCooc)
578 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
579 --------------------------------------
581 thr = toThreshold elevation (phylo ^. phylo_groupsProxi)