2 Module : Gargantext.Core.Viz.Phylo.TemporalMatching
3 Description : Module dedicated to the adaptative temporal matching of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Viz.Phylo.TemporalMatching where
14 import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or, sort, (!!))
15 import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
17 import Gargantext.Prelude
18 import Gargantext.Core.Viz.AdaptativePhylo
19 import Gargantext.Core.Viz.Phylo.PhyloTools
21 import Prelude (floor)
22 import Control.Lens hiding (Level)
23 import Control.Parallel.Strategies (parList, rdeepseq, using)
24 import Debug.Trace (trace)
28 import qualified Data.Map as Map
29 import qualified Data.Set as Set
37 -- | To compute a jaccard similarity between two lists
38 jaccard :: [Int] -> [Int] -> Double
39 jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
42 -- | Process the inverse sumLog
43 sumInvLog' :: Double -> Double -> [Double] -> Double
44 sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
47 -- | Process the sumLog
48 sumLog' :: Double -> Double -> [Double] -> Double
49 sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
52 weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
53 weightedLogJaccard' sens nbDocs diago ngrams ngrams'
54 | null ngramsInter = 0
55 | ngramsInter == ngramsUnion = 1
56 | sens == 0 = jaccard ngramsInter ngramsUnion
57 | sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
58 | otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
60 --------------------------------------
62 ngramsInter = intersect ngrams ngrams'
63 --------------------------------------
65 ngramsUnion = union ngrams ngrams'
66 --------------------------------------
67 diagoInter :: [Double]
68 diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
69 --------------------------------------
70 diagoUnion :: [Double]
71 diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
72 --------------------------------------
75 -- | To process the proximity between a current group and a pair of targets group
76 toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
77 toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
79 WeightedLogJaccard sens ->
80 let pairNgrams = if targetNgrams == targetNgrams'
82 else union targetNgrams targetNgrams'
83 in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
87 ------------------------
88 -- | Local Matching | --
89 ------------------------
91 findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
92 findLastPeriod fil periods = case fil of
93 ToParents -> head' "findLastPeriod" (sortOn fst periods)
94 ToChilds -> last' "findLastPeriod" (sortOn fst periods)
97 -- | To filter pairs of candidates related to old pointers periods
98 removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
99 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
100 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
101 removeOldPointers oldPointers fil thr prox prd pairs
102 | null oldPointers = pairs
103 | null (filterPointers prox thr oldPointers) =
104 let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
105 in if lastMatchedPrd == prd
107 else filter (\((id,_),(id',_)) ->
109 ToParents -> (((fst . fst . fst) id ) < (fst lastMatchedPrd))
110 || (((fst . fst . fst) id') < (fst lastMatchedPrd))
111 ToChilds -> (((fst . fst . fst) id ) > (fst lastMatchedPrd))
112 || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
116 makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
117 -> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
118 makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
121 else removeOldPointers oldPointers fil thr prox lastPrd
122 {- at least on of the pair candidates should be from the last added period -}
123 $ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
125 $ filter (\(id,ngrams) ->
126 let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
127 diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
128 in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
131 lastPrd :: PhyloPeriodId
132 lastPrd = findLastPeriod fil periods
135 filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
136 filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
138 filterPointers' :: Proximity -> Double -> [(Pointer,[Int])] -> [(Pointer,[Int])]
139 filterPointers' proxi thr pts = filter (\((_,w),_) -> filterProximity proxi thr w) pts
142 reduceDiagos :: Map Date Cooc -> Map Int Double
143 reduceDiagos diagos = mapKeys (\(k,_) -> k)
144 $ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
146 filterPointersByPeriod :: Filiation -> [(Pointer,[Int])] -> [Pointer]
147 filterPointersByPeriod fil pts =
148 let pts' = sortOn (fst . fst . fst . fst) pts
149 inf = (fst . fst . fst . fst) $ head' "filterPointersByPeriod" pts'
150 sup = (fst . fst . fst . fst) $ last' "filterPointersByPeriod" pts'
152 $ nubBy (\pt pt' -> snd pt == snd pt')
153 $ filter (\pt -> ((fst . fst . fst . fst) pt == inf) || ((fst . fst . fst . fst) pt == sup))
155 ToParents -> reverse pts'
158 phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
159 -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
160 phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
161 if (null $ filterPointers proxi thr oldPointers)
162 {- let's find new pointers -}
163 then if null nextPointers
165 else filterPointersByPeriod fil
166 $ head' "phyloGroupMatching"
167 -- Keep only the best set of pointers grouped by proximity
168 $ groupBy (\pt pt' -> (snd . fst) pt == (snd . fst) pt')
169 $ reverse $ sortOn (snd . fst) $ head' "pointers" nextPointers
170 -- Find the first time frame where at leats one pointer satisfies the proximity threshold
173 nextPointers :: [[(Pointer,[Int])]]
174 nextPointers = take 1
176 {- for each time frame, process the proximity on relevant pairs of targeted groups -}
177 $ scanl (\acc groups ->
178 let periods = nub $ map (fst . fst . fst) $ concat groups
179 nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
181 $ filterDiago diagos ([(fst . fst) id] ++ periods)
182 {- important resize nbdocs et diago dans le make pairs -}
183 pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
184 in acc ++ ( filterPointers' proxi thr
187 {- process the proximity between the current group and a pair of candidates -}
188 let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
189 in if ((c == c') || (snd c == snd c'))
190 then [((fst c,proximity),snd c)]
191 else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) pairs )) []
192 $ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...]
195 filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
196 filterDocs d pds = restrictKeys d $ periodsToYears pds
198 filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
199 filterDiago diago pds = restrictKeys diago $ periodsToYears pds
202 -----------------------------
203 -- | Matching Processing | --
204 -----------------------------
207 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
208 getNextPeriods fil max' pId pIds =
210 ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
211 ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
214 getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
215 getCandidates ego targets =
217 filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')
221 matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
222 matchGroupsToGroups frame periods proximity thr docs coocs groups =
223 let groups' = groupByField _phylo_groupPeriod groups
224 in foldl' (\acc prd ->
225 let -- 1) find the parents/childs matching periods
226 periodsPar = getNextPeriods ToParents frame prd periods
227 periodsChi = getNextPeriods ToChilds frame prd periods
228 -- 2) find the parents/childs matching candidates
229 candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
230 candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
231 -- 3) find the parents/child number of docs by years
232 docsPar = filterDocs docs ([prd] ++ periodsPar)
233 docsChi = filterDocs docs ([prd] ++ periodsChi)
234 -- 4) find the parents/child diago by years
235 diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
236 diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
237 -- 5) match in parallel all the groups (egos) to their possible candidates
239 let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
240 thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
241 pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi
242 thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
243 in addPointers ToChilds TemporalPointer pointersChi
244 $ addPointers ToParents TemporalPointer pointersPar ego)
245 $ findWithDefault [] prd groups'
246 egos' = egos `using` parList rdeepseq
251 -----------------------
252 -- | Phylo Quality | --
253 -----------------------
256 relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
257 relevantBranches term branches =
258 filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
260 accuracy :: Int -> [PhyloGroup] -> Double
261 accuracy x bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
262 / (fromIntegral $ length bk))
264 recall :: Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
265 recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
266 / (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
268 fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
269 fScore beta x bk bx =
270 let rec = recall x bk bx
272 in ((1 + beta ** 2) * acc * rec)
273 / (((beta ** 2) * rec + acc))
276 wk :: [PhyloGroup] -> Double
277 wk bk = fromIntegral $ length bk
280 toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
281 toPhyloQuality' beta freq branches =
286 let bks = relevantBranches i branches
287 in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i bk bks)) bks))
290 toRecall :: Map Int Double -> [[PhyloGroup]] -> Double
291 toRecall freq branches =
297 bx = relevantBranches x branches
298 wks = sum $ map wk bx
299 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (recall x bk bx)) bx))
303 pys = sum (elems freq)
306 toAccuracy :: Map Int Double -> [[PhyloGroup]] -> Double
307 toAccuracy freq branches =
313 bx = relevantBranches x branches
314 wks = sum $ map wk bx
315 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (accuracy x bk)) bx))
319 pys = sum (elems freq)
322 -- | here we do the average of all the local f_scores
323 toPhyloQuality :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
324 toPhyloQuality beta freq branches =
330 bx = relevantBranches x branches
331 wks = sum $ map wk bx
332 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x bk bx)) bx))
336 pys = sum (elems freq)
339 ------------------------------------
340 -- | Constant Temporal Matching | --
341 ------------------------------------
344 groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
345 groupsToBranches' groups =
346 -- | run the related component algorithm
347 let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
348 $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
349 $ map (\group -> [getGroupId group]
350 ++ (map fst $ group ^. phylo_groupPeriodParents)
351 ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
352 -- first find the related components by inside each ego's period
354 graph' = map relatedComponents egos
355 -- then run it for the all the periods
357 $ relatedComponents $ concat (graph' `using` parList rdeepseq)
358 -- update each group's branch id
359 in map (\(bId,ids) ->
360 let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
361 $ elems $ restrictKeys groups (Set.fromList ids)
362 in groups' `using` parList rdeepseq ) graph
365 reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
366 reduceFrequency frequency branches =
367 restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
369 updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
370 updateThr thr branches = map (\b -> map (\g ->
371 g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
374 -- Sequentially break each branch of a phylo where
375 -- done = all the allready broken branches
376 -- ego = the current branch we want to break
377 -- rest = the branches we still have to break
378 breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
379 -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
380 breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
381 -- 1) keep or not the new division of ego
382 let done' = done ++ (if snd ego
384 (if ((null (fst ego')) || (quality > quality'))
386 -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
387 -- <> " | " <> show(length $ fst ego) <> " groups : "
388 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
389 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
392 -- trace (" ✓ level = " <> printf "%.1f" thr <> "")
393 -- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
394 -- <> " | " <> show(length $ fst ego) <> " groups : "
395 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
396 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
397 ((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
400 -- 2) if there is no more branches in rest then return else continue
403 else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
404 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
406 --------------------------------------
408 quality = toPhyloQuality beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
409 --------------------------------------
410 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
412 let branches = groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
413 $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
414 branches' = branches `using` parList rdeepseq
415 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
417 $ depthToMeta (elevation - depth) branches'
418 --------------------------------------
420 quality' = toPhyloQuality beta frequency
421 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
424 seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
425 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
426 seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
427 -- if there is no branch to break or if seaLvl level > 1 then end
428 if (thr >= 1) || ((not . or) $ map snd branches)
431 -- break all the possible branches at the current seaLvl level
432 let quality = toPhyloQuality beta frequency (map fst branches)
433 acc = toAccuracy frequency (map fst branches)
434 rec = toRecall frequency (map fst branches)
435 branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(β) = " <> printf "%.5f" quality
436 <> " ξ = " <> printf "%.5f" acc
437 <> " ρ = " <> printf "%.5f" rec
438 <> " branches = " <> show(length branches) <> " ↴")
439 $ breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
440 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
441 frequency' = reduceFrequency frequency (map fst branches')
442 in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
445 constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
446 constanteTemporalMatching start step phylo = updatePhyloGroups 1
447 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
448 (toPhyloHorizon phylo)
450 -- 2) process the temporal matching by elevating seaLvl level
451 branches :: [[PhyloGroup]]
453 $ seaLevelMatching (phyloProximity $ getConfig phylo)
454 (_qua_granularity $ phyloQuality $ getConfig phylo)
455 (_qua_minBranch $ phyloQuality $ getConfig phylo)
456 (phylo ^. phylo_termFreq)
458 ((((1 - start) / step) - 1))
459 (((1 - start) / step))
460 (getTimeFrame $ timeUnit $ getConfig phylo)
462 (phylo ^. phylo_timeDocs)
463 (phylo ^. phylo_timeCooc)
465 -- 1) for each group process an initial temporal Matching
466 -- here we suppose that all the groups of level 1 are part of the same big branch
467 groups :: [([PhyloGroup],Bool)]
468 groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
469 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
470 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
471 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
473 (phylo ^. phylo_timeDocs)
474 (phylo ^. phylo_timeCooc)
475 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
481 toPhyloHorizon :: Phylo -> Phylo
482 toPhyloHorizon phylo =
483 let t0 = take 1 (getPeriodIds phylo)
484 groups = getGroupsFromLevelPeriods 1 t0 phylo
485 sens = getSensibility (phyloProximity $ getConfig phylo)
486 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) t0
487 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) t0
488 in phylo & phylo_horizon .~ (fromList $ map (\(g,g') ->
489 ((getGroupId g,getGroupId g'),weightedLogJaccard' sens nbDocs diago (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) $ listToCombi' groups)
492 --------------------------------------
493 -- | Adaptative Temporal Matching | --
494 --------------------------------------
497 thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
498 thrToMeta thr branches =
500 map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
502 depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
503 depthToMeta depth branches =
504 let break = length branches > 1
507 if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
510 reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
511 reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
514 getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
516 | isJust (m !? ( k ,k')) = m ! ( k ,k')
517 | isJust (m !? ( k',k )) = m ! ( k',k )
521 toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
522 toThreshold lvl proxiGroups =
523 let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
525 then (sort $ elems proxiGroups) !! idx
529 -- done = all the allready broken branches
530 -- ego = the current branch we want to break
531 -- rest = the branches we still have to break
532 adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
533 -> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
534 -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
535 -> [([PhyloGroup],(Bool,[Double]))]
536 adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
537 -- 1) keep or not the new division of ego
538 let done' = done ++ (if (fst . snd) ego
539 then (if ((null (fst ego')) || (quality > quality'))
541 [(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
543 ( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
544 ++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
545 else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
547 -- uncomment let .. in for debugging
548 -- let part1 = partition (snd) done'
549 -- part2 = partition (snd) rest
550 -- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
551 -- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
553 -- 2) if there is no more branches in rest then return else continue
556 else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
557 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
559 --------------------------------------
561 thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
562 --------------------------------------
564 quality = toPhyloQuality beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
565 --------------------------------------
566 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
568 let branches = groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
569 $ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
570 branches' = branches `using` parList rdeepseq
571 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
573 $ depthToMeta (elevation - depth) branches'
574 --------------------------------------
576 quality' = toPhyloQuality beta frequency
577 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
580 adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
581 -> Double -> Int -> Map Int Double
582 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
583 -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
584 adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
585 -- if there is no branch to break or if seaLvl level >= depth then end
586 if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
589 -- break all the possible branches at the current seaLvl level
590 let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
591 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
592 frequency' = reduceFrequency frequency (map fst branches')
593 groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
594 -- thr = toThreshold depth groupsProxi
595 in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
596 <> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
597 <> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
599 $ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches'
602 adaptativeTemporalMatching :: Double -> Phylo -> Phylo
603 adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
604 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
605 (toPhyloHorizon phylo)
607 -- 2) process the temporal matching by elevating seaLvl level
608 branches :: [[PhyloGroup]]
610 $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
613 (phylo ^. phylo_groupsProxi)
614 (_qua_granularity $ phyloQuality $ getConfig phylo)
615 (_qua_minBranch $ phyloQuality $ getConfig phylo)
616 (phylo ^. phylo_termFreq)
617 (getTimeFrame $ timeUnit $ getConfig phylo)
619 (phylo ^. phylo_timeDocs)
620 (phylo ^. phylo_timeCooc)
622 -- 1) for each group process an initial temporal Matching
623 -- here we suppose that all the groups of level 1 are part of the same big branch
624 groups :: [([PhyloGroup],(Bool,[Double]))]
625 groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
626 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
627 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
628 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
630 (phylo ^. phylo_timeDocs)
631 (phylo ^. phylo_timeCooc)
632 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
633 --------------------------------------
635 thr = toThreshold elevation (phylo ^. phylo_groupsProxi)