2 Module : Gargantext.Viz.Phylo.TemporalMatching
3 Description : Module dedicated to the adaptative temporal matching of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
16 module Gargantext.Viz.Phylo.TemporalMatching where
18 import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or, sort, (!!))
19 import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
21 import Gargantext.Prelude
22 import Gargantext.Viz.AdaptativePhylo
23 import Gargantext.Viz.Phylo.PhyloTools
25 import Prelude (floor)
26 import Control.Lens hiding (Level)
27 import Control.Parallel.Strategies (parList, rdeepseq, using)
28 import Debug.Trace (trace)
32 import qualified Data.Map as Map
33 import qualified Data.Set as Set
41 -- | To compute a jaccard similarity between two lists
42 jaccard :: [Int] -> [Int] -> Double
43 jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
46 -- | Process the inverse sumLog
47 sumInvLog' :: Double -> Double -> [Double] -> Double
48 sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
51 -- | Process the sumLog
52 sumLog' :: Double -> Double -> [Double] -> Double
53 sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
56 weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
57 weightedLogJaccard' sens nbDocs diago ngrams ngrams'
58 | null ngramsInter = 0
59 | ngramsInter == ngramsUnion = 1
60 | sens == 0 = jaccard ngramsInter ngramsUnion
61 | sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
62 | otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
64 --------------------------------------
66 ngramsInter = intersect ngrams ngrams'
67 --------------------------------------
69 ngramsUnion = union ngrams ngrams'
70 --------------------------------------
71 diagoInter :: [Double]
72 diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
73 --------------------------------------
74 diagoUnion :: [Double]
75 diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
76 --------------------------------------
79 -- | To process the proximity between a current group and a pair of targets group
80 toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
81 toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
83 WeightedLogJaccard sens ->
84 let pairNgrams = if targetNgrams == targetNgrams'
86 else union targetNgrams targetNgrams'
87 in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
91 ------------------------
92 -- | Local Matching | --
93 ------------------------
95 findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
96 findLastPeriod fil periods = case fil of
97 ToParents -> head' "findLastPeriod" (sortOn fst periods)
98 ToChilds -> last' "findLastPeriod" (sortOn fst periods)
101 -- | To filter pairs of candidates related to old pointers periods
102 removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
103 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
104 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
105 removeOldPointers oldPointers fil thr prox prd pairs
106 | null oldPointers = pairs
107 | null (filterPointers prox thr oldPointers) =
108 let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
109 in if lastMatchedPrd == prd
111 else filter (\((id,_),(id',_)) ->
113 ToParents -> (((fst . fst . fst) id ) < (fst lastMatchedPrd))
114 || (((fst . fst . fst) id') < (fst lastMatchedPrd))
115 ToChilds -> (((fst . fst . fst) id ) > (fst lastMatchedPrd))
116 || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
120 makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
121 -> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
122 makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
125 else removeOldPointers oldPointers fil thr prox lastPrd
126 -- | at least on of the pair candidates should be from the last added period
127 $ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
129 $ filter (\(id,ngrams) ->
130 let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
131 diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
132 in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
135 lastPrd :: PhyloPeriodId
136 lastPrd = findLastPeriod fil periods
139 filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
140 filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
143 reduceDiagos :: Map Date Cooc -> Map Int Double
144 reduceDiagos diagos = mapKeys (\(k,_) -> k)
145 $ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
147 filterPointersByPeriod :: [Pointer] -> [Pointer]
148 filterPointersByPeriod pts =
149 let pts' = sortOn (fst . fst . fst) pts
150 inf = (fst . fst . fst) $ head' "filterPointersByPeriod" pts'
151 sup = (fst . fst . fst) $ last' "filterPointersByPeriod" pts'
153 $ filter (\pt -> ((fst . fst . fst) pt == inf) || ((fst . fst . fst) pt == sup)) pts'
155 phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
156 -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
157 phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
158 if (null $ filterPointers proxi thr oldPointers)
159 -- | let's find new pointers
160 then if null nextPointers
162 else filterPointersByPeriod
163 $ head' "phyloGroupMatching"
164 -- | Keep only the best set of pointers grouped by proximity
165 $ groupBy (\pt pt' -> snd pt == snd pt')
166 $ reverse $ sortOn snd $ head' "pointers" nextPointers
167 -- | Find the first time frame where at leats one pointer satisfies the proximity threshold
170 nextPointers :: [[Pointer]]
171 nextPointers = take 1
173 -- | for each time frame, process the proximity on relevant pairs of targeted groups
174 $ scanl (\acc groups ->
175 let periods = nub $ map (fst . fst . fst) $ concat groups
176 nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
178 $ filterDiago diagos ([(fst . fst) id] ++ periods)
179 -- | important resize nbdocs et diago dans le make pairs
180 pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
181 in acc ++ ( filterPointers proxi thr
184 -- | process the proximity between the current group and a pair of candidates
185 let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
187 then [(fst c,proximity)]
188 else [(fst c,proximity),(fst c',proximity)] ) pairs )) []
189 $ inits candidates -- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
192 filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
193 filterDocs d pds = restrictKeys d $ periodsToYears pds
195 filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
196 filterDiago diago pds = restrictKeys diago $ periodsToYears pds
199 -----------------------------
200 -- | Matching Processing | --
201 -----------------------------
204 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
205 getNextPeriods fil max' pId pIds =
207 ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
208 ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
211 getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
212 getCandidates ego targets =
214 filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')
218 matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
219 matchGroupsToGroups frame periods proximity thr docs coocs groups =
220 let groups' = groupByField _phylo_groupPeriod groups
221 in foldl' (\acc prd ->
222 let -- | 1) find the parents/childs matching periods
223 periodsPar = getNextPeriods ToParents frame prd periods
224 periodsChi = getNextPeriods ToChilds frame prd periods
225 -- | 2) find the parents/childs matching candidates
226 candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
227 candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
228 -- | 3) find the parents/child number of docs by years
229 docsPar = filterDocs docs ([prd] ++ periodsPar)
230 docsChi = filterDocs docs ([prd] ++ periodsChi)
231 -- | 4) find the parents/child diago by years
232 diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
233 diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
234 -- | 5) match in parallel all the groups (egos) to their possible candidates
236 let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
237 thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
238 pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi
239 thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
240 in addPointers ToChilds TemporalPointer pointersChi
241 $ addPointers ToParents TemporalPointer pointersPar ego)
242 $ findWithDefault [] prd groups'
243 egos' = egos `using` parList rdeepseq
248 -----------------------
249 -- | Phylo Quality | --
250 -----------------------
253 relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
254 relevantBranches term branches =
255 filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
257 accuracy :: Int -> [PhyloGroup] -> Double
258 accuracy x bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
259 / (fromIntegral $ length bk))
261 recall :: Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
262 recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
263 / (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
265 fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
266 fScore beta x bk bx =
267 let rec = recall x bk bx
269 in ((1 + beta ** 2) * acc * rec)
270 / (((beta ** 2) * rec + acc))
273 wk :: [PhyloGroup] -> Double
274 wk bk = fromIntegral $ length bk
277 toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
278 toPhyloQuality' beta freq branches =
283 let bks = relevantBranches i branches
284 in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i bk bks)) bks))
287 toRecall :: Map Int Double -> [[PhyloGroup]] -> Double
288 toRecall freq branches =
294 bx = relevantBranches x branches
295 wks = sum $ map wk bx
296 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (recall x bk bx)) bx))
300 pys = sum (elems freq)
303 toAccuracy :: Map Int Double -> [[PhyloGroup]] -> Double
304 toAccuracy freq branches =
310 bx = relevantBranches x branches
311 wks = sum $ map wk bx
312 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (accuracy x bk)) bx))
316 pys = sum (elems freq)
319 -- | here we do the average of all the local f_scores
320 toPhyloQuality :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
321 toPhyloQuality beta freq branches =
327 bx = relevantBranches x branches
328 wks = sum $ map wk bx
329 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x bk bx)) bx))
333 pys = sum (elems freq)
336 ------------------------------------
337 -- | Constant Temporal Matching | --
338 ------------------------------------
341 groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
342 groupsToBranches groups =
343 -- | run the related component algorithm
344 let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
345 $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
346 $ map (\group -> [getGroupId group]
347 ++ (map fst $ group ^. phylo_groupPeriodParents)
348 ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
349 -- | first find the related components by inside each ego's period
351 graph' = map relatedComponents egos
352 -- | then run it for the all the periods
354 $ relatedComponents $ concat (graph' `using` parList rdeepseq)
355 -- | update each group's branch id
356 in map (\(bId,ids) ->
357 let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
358 $ elems $ restrictKeys groups (Set.fromList ids)
359 in groups' `using` parList rdeepseq ) graph
362 reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
363 reduceFrequency frequency branches =
364 restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
366 updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
367 updateThr thr branches = map (\b -> map (\g ->
368 g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
371 -- | Sequentially break each branch of a phylo where
372 -- done = all the allready broken branches
373 -- ego = the current branch we want to break
374 -- rest = the branches we still have to break
375 breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
376 -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
377 breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
378 -- | 1) keep or not the new division of ego
379 let done' = done ++ (if snd ego
381 (if ((null (fst ego')) || (quality > quality'))
383 -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
384 -- <> " | " <> show(length $ fst ego) <> " groups : "
385 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
386 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
389 -- trace (" ✓ level = " <> printf "%.1f" thr <> "")
390 -- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
391 -- <> " | " <> show(length $ fst ego) <> " groups : "
392 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
393 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
394 ((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
397 -- | 2) if there is no more branches in rest then return else continue
400 else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
401 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
403 --------------------------------------
405 quality = toPhyloQuality beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
406 --------------------------------------
407 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
409 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
410 $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
411 branches' = branches `using` parList rdeepseq
412 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
414 $ depthToMeta (elevation - depth) branches'
415 --------------------------------------
417 quality' = toPhyloQuality beta frequency
418 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
421 seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
422 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
423 seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
424 -- | if there is no branch to break or if seaLvl level > 1 then end
425 if (thr >= 1) || ((not . or) $ map snd branches)
428 -- | break all the possible branches at the current seaLvl level
429 let quality = toPhyloQuality beta frequency (map fst branches)
430 acc = toAccuracy frequency (map fst branches)
431 rec = toRecall frequency (map fst branches)
432 branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(β) = " <> printf "%.5f" quality
433 <> " ξ = " <> printf "%.5f" acc
434 <> " ρ = " <> printf "%.5f" rec
435 <> " branches = " <> show(length branches) <> " ↴")
436 $ breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
437 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
438 frequency' = reduceFrequency frequency (map fst branches')
439 in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
442 constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
443 constanteTemporalMatching start step phylo = updatePhyloGroups 1
444 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
445 (toPhyloHorizon phylo)
447 -- | 2) process the temporal matching by elevating seaLvl level
448 branches :: [[PhyloGroup]]
450 $ seaLevelMatching (phyloProximity $ getConfig phylo)
451 (_qua_granularity $ phyloQuality $ getConfig phylo)
452 (_qua_minBranch $ phyloQuality $ getConfig phylo)
453 (phylo ^. phylo_termFreq)
455 ((((1 - start) / step) - 1))
456 (((1 - start) / step))
457 (getTimeFrame $ timeUnit $ getConfig phylo)
459 (phylo ^. phylo_timeDocs)
460 (phylo ^. phylo_timeCooc)
462 -- | 1) for each group process an initial temporal Matching
463 -- | here we suppose that all the groups of level 1 are part of the same big branch
464 groups :: [([PhyloGroup],Bool)]
465 groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
466 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
467 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
468 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
470 (phylo ^. phylo_timeDocs)
471 (phylo ^. phylo_timeCooc)
472 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
478 toPhyloHorizon :: Phylo -> Phylo
479 toPhyloHorizon phylo =
480 let t0 = take 1 (getPeriodIds phylo)
481 groups = getGroupsFromLevelPeriods 1 t0 phylo
482 sens = getSensibility (phyloProximity $ getConfig phylo)
483 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) t0
484 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) t0
485 in phylo & phylo_horizon .~ (fromList $ map (\(g,g') ->
486 ((getGroupId g,getGroupId g'),weightedLogJaccard' sens nbDocs diago (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) $ listToCombi' groups)
489 --------------------------------------
490 -- | Adaptative Temporal Matching | --
491 --------------------------------------
494 thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
495 thrToMeta thr branches =
497 map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
499 depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
500 depthToMeta depth branches =
501 let break = length branches > 1
504 if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
507 reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
508 reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
511 getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
513 | isJust (m !? ( k ,k')) = m ! ( k ,k')
514 | isJust (m !? ( k',k )) = m ! ( k',k )
518 toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
519 toThreshold lvl proxiGroups =
520 let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
522 then (sort $ elems proxiGroups) !! idx
526 -- done = all the allready broken branches
527 -- ego = the current branch we want to break
528 -- rest = the branches we still have to break
529 adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
530 -> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
531 -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
532 -> [([PhyloGroup],(Bool,[Double]))]
533 adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
534 -- | 1) keep or not the new division of ego
535 let done' = done ++ (if (fst . snd) ego
536 then (if ((null (fst ego')) || (quality > quality'))
538 [(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
540 ( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
541 ++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
542 else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
544 -- | uncomment let .. in for debugging
545 -- let part1 = partition (snd) done'
546 -- part2 = partition (snd) rest
547 -- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
548 -- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
550 -- | 2) if there is no more branches in rest then return else continue
553 else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
554 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
556 --------------------------------------
558 thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
559 --------------------------------------
561 quality = toPhyloQuality beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
562 --------------------------------------
563 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
565 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
566 $ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
567 branches' = branches `using` parList rdeepseq
568 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
570 $ depthToMeta (elevation - depth) branches'
571 --------------------------------------
573 quality' = toPhyloQuality beta frequency
574 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
577 adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
578 -> Double -> Int -> Map Int Double
579 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
580 -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
581 adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
582 -- | if there is no branch to break or if seaLvl level >= depth then end
583 if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
586 -- | break all the possible branches at the current seaLvl level
587 let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
588 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
589 frequency' = reduceFrequency frequency (map fst branches')
590 groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
591 -- thr = toThreshold depth groupsProxi
592 in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
593 <> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
594 <> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
596 $ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches'
599 adaptativeTemporalMatching :: Double -> Phylo -> Phylo
600 adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
601 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
602 (toPhyloHorizon phylo)
604 -- | 2) process the temporal matching by elevating seaLvl level
605 branches :: [[PhyloGroup]]
607 $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
610 (phylo ^. phylo_groupsProxi)
611 (_qua_granularity $ phyloQuality $ getConfig phylo)
612 (_qua_minBranch $ phyloQuality $ getConfig phylo)
613 (phylo ^. phylo_termFreq)
614 (getTimeFrame $ timeUnit $ getConfig phylo)
616 (phylo ^. phylo_timeDocs)
617 (phylo ^. phylo_timeCooc)
619 -- | 1) for each group process an initial temporal Matching
620 -- | here we suppose that all the groups of level 1 are part of the same big branch
621 groups :: [([PhyloGroup],(Bool,[Double]))]
622 groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
623 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
624 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
625 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
627 (phylo ^. phylo_timeDocs)
628 (phylo ^. phylo_timeCooc)
629 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
630 --------------------------------------
632 thr = toThreshold elevation (phylo ^. phylo_groupsProxi)