2 Module : Gargantext.Viz.Phylo.TemporalMatching
3 Description : Module dedicated to the adaptative temporal matching of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
16 module Gargantext.Viz.Phylo.TemporalMatching where
18 import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or, sort, (!!))
19 import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
21 import Gargantext.Prelude
22 import Gargantext.Viz.AdaptativePhylo
23 import Gargantext.Viz.Phylo.PhyloTools
25 import Prelude (floor)
26 import Control.Lens hiding (Level)
27 import Control.Parallel.Strategies (parList, rdeepseq, using)
28 import Debug.Trace (trace)
32 import qualified Data.Map as Map
33 import qualified Data.Set as Set
41 -- | To compute a jaccard similarity between two lists
42 jaccard :: [Int] -> [Int] -> Double
43 jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
46 -- | Process the inverse sumLog
47 sumInvLog' :: Double -> Double -> [Double] -> Double
48 sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
51 -- | Process the sumLog
52 sumLog' :: Double -> Double -> [Double] -> Double
53 sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
56 weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
57 weightedLogJaccard' sens nbDocs diago ngrams ngrams'
58 | null ngramsInter = 0
59 | ngramsInter == ngramsUnion = 1
60 | sens == 0 = jaccard ngramsInter ngramsUnion
61 | sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
62 | otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
64 --------------------------------------
66 ngramsInter = intersect ngrams ngrams'
67 --------------------------------------
69 ngramsUnion = union ngrams ngrams'
70 --------------------------------------
71 diagoInter :: [Double]
72 diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
73 --------------------------------------
74 diagoUnion :: [Double]
75 diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
76 --------------------------------------
79 -- | To process the proximity between a current group and a pair of targets group
80 toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
81 toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
83 WeightedLogJaccard sens ->
84 let pairNgrams = if targetNgrams == targetNgrams'
86 else union targetNgrams targetNgrams'
87 in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
91 ------------------------
92 -- | Local Matching | --
93 ------------------------
95 findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
96 findLastPeriod fil periods = case fil of
97 ToParents -> head' "findLastPeriod" (sortOn fst periods)
98 ToChilds -> last' "findLastPeriod" (sortOn fst periods)
101 -- | To filter pairs of candidates related to old pointers periods
102 removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
103 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
104 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
105 removeOldPointers oldPointers fil thr prox prd pairs
106 | null oldPointers = pairs
107 | null (filterPointers prox thr oldPointers) =
108 let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
109 in if lastMatchedPrd == prd
111 else filter (\((id,_),(id',_)) ->
113 ToParents -> (((fst . fst . fst) id ) < (fst lastMatchedPrd))
114 || (((fst . fst . fst) id') < (fst lastMatchedPrd))
115 ToChilds -> (((fst . fst . fst) id ) > (fst lastMatchedPrd))
116 || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
120 makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
121 -> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
122 makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
125 else removeOldPointers oldPointers fil thr prox lastPrd
126 -- | at least on of the pair candidates should be from the last added period
127 $ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
129 $ filter (\(id,ngrams) ->
130 let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
131 diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
132 in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
135 lastPrd :: PhyloPeriodId
136 lastPrd = findLastPeriod fil periods
139 filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
140 filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
142 filterPointers' :: Proximity -> Double -> [(Pointer,[Int])] -> [(Pointer,[Int])]
143 filterPointers' proxi thr pts = filter (\((_,w),_) -> filterProximity proxi thr w) pts
146 reduceDiagos :: Map Date Cooc -> Map Int Double
147 reduceDiagos diagos = mapKeys (\(k,_) -> k)
148 $ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
150 filterPointersByPeriod :: Filiation -> [(Pointer,[Int])] -> [Pointer]
151 filterPointersByPeriod fil pts =
152 let pts' = sortOn (fst . fst . fst . fst) pts
153 inf = (fst . fst . fst . fst) $ head' "filterPointersByPeriod" pts'
154 sup = (fst . fst . fst . fst) $ last' "filterPointersByPeriod" pts'
156 $ nubBy (\pt pt' -> snd pt == snd pt')
157 $ filter (\pt -> ((fst . fst . fst . fst) pt == inf) || ((fst . fst . fst . fst) pt == sup))
159 ToParents -> reverse pts'
162 phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
163 -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
164 phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
165 if (null $ filterPointers proxi thr oldPointers)
166 -- | let's find new pointers
167 then if null nextPointers
169 else filterPointersByPeriod fil
170 $ head' "phyloGroupMatching"
171 -- | Keep only the best set of pointers grouped by proximity
172 $ groupBy (\pt pt' -> (snd . fst) pt == (snd . fst) pt')
173 $ reverse $ sortOn (snd . fst) $ head' "pointers" nextPointers
174 -- | Find the first time frame where at leats one pointer satisfies the proximity threshold
177 nextPointers :: [[(Pointer,[Int])]]
178 nextPointers = take 1
180 -- | for each time frame, process the proximity on relevant pairs of targeted groups
181 $ scanl (\acc groups ->
182 let periods = nub $ map (fst . fst . fst) $ concat groups
183 nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
185 $ filterDiago diagos ([(fst . fst) id] ++ periods)
186 -- | important resize nbdocs et diago dans le make pairs
187 pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
188 in acc ++ ( filterPointers' proxi thr
191 -- | process the proximity between the current group and a pair of candidates
192 let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
193 in if ((c == c') || (snd c == snd c'))
194 then [((fst c,proximity),snd c)]
195 else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) pairs )) []
196 $ inits candidates -- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
199 filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
200 filterDocs d pds = restrictKeys d $ periodsToYears pds
202 filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
203 filterDiago diago pds = restrictKeys diago $ periodsToYears pds
206 -----------------------------
207 -- | Matching Processing | --
208 -----------------------------
211 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
212 getNextPeriods fil max' pId pIds =
214 ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
215 ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
218 getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
219 getCandidates ego targets =
221 filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')
225 matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
226 matchGroupsToGroups frame periods proximity thr docs coocs groups =
227 let groups' = groupByField _phylo_groupPeriod groups
228 in foldl' (\acc prd ->
229 let -- | 1) find the parents/childs matching periods
230 periodsPar = getNextPeriods ToParents frame prd periods
231 periodsChi = getNextPeriods ToChilds frame prd periods
232 -- | 2) find the parents/childs matching candidates
233 candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
234 candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
235 -- | 3) find the parents/child number of docs by years
236 docsPar = filterDocs docs ([prd] ++ periodsPar)
237 docsChi = filterDocs docs ([prd] ++ periodsChi)
238 -- | 4) find the parents/child diago by years
239 diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
240 diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
241 -- | 5) match in parallel all the groups (egos) to their possible candidates
243 let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
244 thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
245 pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi
246 thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
247 in addPointers ToChilds TemporalPointer pointersChi
248 $ addPointers ToParents TemporalPointer pointersPar ego)
249 $ findWithDefault [] prd groups'
250 egos' = egos `using` parList rdeepseq
255 -----------------------
256 -- | Phylo Quality | --
257 -----------------------
260 relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
261 relevantBranches term branches =
262 filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
264 accuracy :: Int -> [PhyloGroup] -> Double
265 accuracy x bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
266 / (fromIntegral $ length bk))
268 recall :: Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
269 recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
270 / (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
272 fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
273 fScore beta x bk bx =
274 let rec = recall x bk bx
276 in ((1 + beta ** 2) * acc * rec)
277 / (((beta ** 2) * rec + acc))
280 wk :: [PhyloGroup] -> Double
281 wk bk = fromIntegral $ length bk
284 toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
285 toPhyloQuality' beta freq branches =
290 let bks = relevantBranches i branches
291 in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i bk bks)) bks))
294 toRecall :: Map Int Double -> [[PhyloGroup]] -> Double
295 toRecall freq branches =
301 bx = relevantBranches x branches
302 wks = sum $ map wk bx
303 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (recall x bk bx)) bx))
307 pys = sum (elems freq)
310 toAccuracy :: Map Int Double -> [[PhyloGroup]] -> Double
311 toAccuracy freq branches =
317 bx = relevantBranches x branches
318 wks = sum $ map wk bx
319 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (accuracy x bk)) bx))
323 pys = sum (elems freq)
326 -- | here we do the average of all the local f_scores
327 toPhyloQuality :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
328 toPhyloQuality beta freq branches =
334 bx = relevantBranches x branches
335 wks = sum $ map wk bx
336 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x bk bx)) bx))
340 pys = sum (elems freq)
343 ------------------------------------
344 -- | Constant Temporal Matching | --
345 ------------------------------------
348 groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
349 groupsToBranches groups =
350 -- | run the related component algorithm
351 let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
352 $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
353 $ map (\group -> [getGroupId group]
354 ++ (map fst $ group ^. phylo_groupPeriodParents)
355 ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
356 -- | first find the related components by inside each ego's period
358 graph' = map relatedComponents egos
359 -- | then run it for the all the periods
361 $ relatedComponents $ concat (graph' `using` parList rdeepseq)
362 -- | update each group's branch id
363 in map (\(bId,ids) ->
364 let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
365 $ elems $ restrictKeys groups (Set.fromList ids)
366 in groups' `using` parList rdeepseq ) graph
369 reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
370 reduceFrequency frequency branches =
371 restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
373 updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
374 updateThr thr branches = map (\b -> map (\g ->
375 g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
378 -- | Sequentially break each branch of a phylo where
379 -- done = all the allready broken branches
380 -- ego = the current branch we want to break
381 -- rest = the branches we still have to break
382 breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
383 -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
384 breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
385 -- | 1) keep or not the new division of ego
386 let done' = done ++ (if snd ego
388 (if ((null (fst ego')) || (quality > quality'))
390 -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
391 -- <> " | " <> show(length $ fst ego) <> " groups : "
392 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
393 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
396 -- trace (" ✓ level = " <> printf "%.1f" thr <> "")
397 -- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
398 -- <> " | " <> show(length $ fst ego) <> " groups : "
399 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
400 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
401 ((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
404 -- | 2) if there is no more branches in rest then return else continue
407 else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
408 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
410 --------------------------------------
412 quality = toPhyloQuality beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
413 --------------------------------------
414 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
416 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
417 $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
418 branches' = branches `using` parList rdeepseq
419 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
421 $ depthToMeta (elevation - depth) branches'
422 --------------------------------------
424 quality' = toPhyloQuality beta frequency
425 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
428 seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
429 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
430 seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
431 -- | if there is no branch to break or if seaLvl level > 1 then end
432 if (thr >= 1) || ((not . or) $ map snd branches)
435 -- | break all the possible branches at the current seaLvl level
436 let quality = toPhyloQuality beta frequency (map fst branches)
437 acc = toAccuracy frequency (map fst branches)
438 rec = toRecall frequency (map fst branches)
439 branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(β) = " <> printf "%.5f" quality
440 <> " ξ = " <> printf "%.5f" acc
441 <> " ρ = " <> printf "%.5f" rec
442 <> " branches = " <> show(length branches) <> " ↴")
443 $ breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
444 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
445 frequency' = reduceFrequency frequency (map fst branches')
446 in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
449 constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
450 constanteTemporalMatching start step phylo = updatePhyloGroups 1
451 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
452 (toPhyloHorizon phylo)
454 -- | 2) process the temporal matching by elevating seaLvl level
455 branches :: [[PhyloGroup]]
457 $ seaLevelMatching (phyloProximity $ getConfig phylo)
458 (_qua_granularity $ phyloQuality $ getConfig phylo)
459 (_qua_minBranch $ phyloQuality $ getConfig phylo)
460 (phylo ^. phylo_termFreq)
462 ((((1 - start) / step) - 1))
463 (((1 - start) / step))
464 (getTimeFrame $ timeUnit $ getConfig phylo)
466 (phylo ^. phylo_timeDocs)
467 (phylo ^. phylo_timeCooc)
469 -- | 1) for each group process an initial temporal Matching
470 -- | here we suppose that all the groups of level 1 are part of the same big branch
471 groups :: [([PhyloGroup],Bool)]
472 groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
473 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
474 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
475 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
477 (phylo ^. phylo_timeDocs)
478 (phylo ^. phylo_timeCooc)
479 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
485 toPhyloHorizon :: Phylo -> Phylo
486 toPhyloHorizon phylo =
487 let t0 = take 1 (getPeriodIds phylo)
488 groups = getGroupsFromLevelPeriods 1 t0 phylo
489 sens = getSensibility (phyloProximity $ getConfig phylo)
490 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) t0
491 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) t0
492 in phylo & phylo_horizon .~ (fromList $ map (\(g,g') ->
493 ((getGroupId g,getGroupId g'),weightedLogJaccard' sens nbDocs diago (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) $ listToCombi' groups)
496 --------------------------------------
497 -- | Adaptative Temporal Matching | --
498 --------------------------------------
501 thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
502 thrToMeta thr branches =
504 map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
506 depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
507 depthToMeta depth branches =
508 let break = length branches > 1
511 if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
514 reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
515 reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
518 getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
520 | isJust (m !? ( k ,k')) = m ! ( k ,k')
521 | isJust (m !? ( k',k )) = m ! ( k',k )
525 toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
526 toThreshold lvl proxiGroups =
527 let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
529 then (sort $ elems proxiGroups) !! idx
533 -- done = all the allready broken branches
534 -- ego = the current branch we want to break
535 -- rest = the branches we still have to break
536 adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
537 -> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
538 -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
539 -> [([PhyloGroup],(Bool,[Double]))]
540 adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
541 -- | 1) keep or not the new division of ego
542 let done' = done ++ (if (fst . snd) ego
543 then (if ((null (fst ego')) || (quality > quality'))
545 [(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
547 ( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
548 ++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
549 else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
551 -- | uncomment let .. in for debugging
552 -- let part1 = partition (snd) done'
553 -- part2 = partition (snd) rest
554 -- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
555 -- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
557 -- | 2) if there is no more branches in rest then return else continue
560 else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
561 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
563 --------------------------------------
565 thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
566 --------------------------------------
568 quality = toPhyloQuality beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
569 --------------------------------------
570 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
572 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
573 $ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
574 branches' = branches `using` parList rdeepseq
575 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
577 $ depthToMeta (elevation - depth) branches'
578 --------------------------------------
580 quality' = toPhyloQuality beta frequency
581 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
584 adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
585 -> Double -> Int -> Map Int Double
586 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
587 -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
588 adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
589 -- | if there is no branch to break or if seaLvl level >= depth then end
590 if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
593 -- | break all the possible branches at the current seaLvl level
594 let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
595 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
596 frequency' = reduceFrequency frequency (map fst branches')
597 groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
598 -- thr = toThreshold depth groupsProxi
599 in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
600 <> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
601 <> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
603 $ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches'
606 adaptativeTemporalMatching :: Double -> Phylo -> Phylo
607 adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
608 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
609 (toPhyloHorizon phylo)
611 -- | 2) process the temporal matching by elevating seaLvl level
612 branches :: [[PhyloGroup]]
614 $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
617 (phylo ^. phylo_groupsProxi)
618 (_qua_granularity $ phyloQuality $ getConfig phylo)
619 (_qua_minBranch $ phyloQuality $ getConfig phylo)
620 (phylo ^. phylo_termFreq)
621 (getTimeFrame $ timeUnit $ getConfig phylo)
623 (phylo ^. phylo_timeDocs)
624 (phylo ^. phylo_timeCooc)
626 -- | 1) for each group process an initial temporal Matching
627 -- | here we suppose that all the groups of level 1 are part of the same big branch
628 groups :: [([PhyloGroup],(Bool,[Double]))]
629 groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
630 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
631 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
632 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
634 (phylo ^. phylo_timeDocs)
635 (phylo ^. phylo_timeCooc)
636 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
637 --------------------------------------
639 thr = toThreshold elevation (phylo ^. phylo_groupsProxi)