2 Module : Gargantext.Core.Viz.Phylo.TemporalMatching
3 Description : Module dedicated to the adaptative temporal matching of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Viz.Phylo.TemporalMatching where
14 import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or, sort, (!!))
15 import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
17 import Gargantext.Prelude
18 import Gargantext.Core.Viz.AdaptativePhylo
19 import Gargantext.Core.Viz.Phylo.PhyloTools
21 import Prelude (floor)
22 import Control.Lens hiding (Level)
23 import Control.Parallel.Strategies (parList, rdeepseq, using)
24 import Debug.Trace (trace)
28 import qualified Data.Map as Map
29 import qualified Data.Set as Set
37 -- | To compute a jaccard similarity between two lists
38 jaccard :: [Int] -> [Int] -> Double
39 jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
42 -- | Process the inverse sumLog
43 sumInvLog' :: Double -> Double -> [Double] -> Double
44 sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
47 -- | Process the sumLog
48 sumLog' :: Double -> Double -> [Double] -> Double
49 sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
52 weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
53 weightedLogJaccard' sens nbDocs diago ngrams ngrams'
54 | null ngramsInter = 0
55 | ngramsInter == ngramsUnion = 1
56 | sens == 0 = jaccard ngramsInter ngramsUnion
57 | sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
58 | otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
60 --------------------------------------
62 ngramsInter = intersect ngrams ngrams'
63 --------------------------------------
65 ngramsUnion = union ngrams ngrams'
66 --------------------------------------
67 diagoInter :: [Double]
68 diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
69 --------------------------------------
70 diagoUnion :: [Double]
71 diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
72 --------------------------------------
75 -- | To process the proximity between a current group and a pair of targets group
76 toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
77 toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
79 WeightedLogJaccard sens ->
80 let pairNgrams = if targetNgrams == targetNgrams'
82 else union targetNgrams targetNgrams'
83 in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
87 ------------------------
88 -- | Local Matching | --
89 ------------------------
91 findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
92 findLastPeriod fil periods = case fil of
93 ToParents -> head' "findLastPeriod" (sortOn fst periods)
94 ToChilds -> last' "findLastPeriod" (sortOn fst periods)
97 -- | To filter pairs of candidates related to old pointers periods
98 removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
99 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
100 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
101 removeOldPointers oldPointers fil thr prox prd pairs
102 | null oldPointers = pairs
103 | null (filterPointers prox thr oldPointers) =
104 let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
105 in if lastMatchedPrd == prd
107 else filter (\((id,_),(id',_)) ->
109 ToParents -> (((fst . fst . fst) id ) < (fst lastMatchedPrd))
110 || (((fst . fst . fst) id') < (fst lastMatchedPrd))
111 ToChilds -> (((fst . fst . fst) id ) > (fst lastMatchedPrd))
112 || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
116 makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
117 -> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
118 makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
121 else removeOldPointers oldPointers fil thr prox lastPrd
122 {- at least on of the pair candidates should be from the last added period -}
123 $ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
125 $ filter (\(id,ngrams) ->
126 let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
127 diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
128 in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
131 lastPrd :: PhyloPeriodId
132 lastPrd = findLastPeriod fil periods
135 filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
136 filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
138 filterPointers' :: Proximity -> Double -> [(Pointer,[Int])] -> [(Pointer,[Int])]
139 filterPointers' proxi thr pts = filter (\((_,w),_) -> filterProximity proxi thr w) pts
142 reduceDiagos :: Map Date Cooc -> Map Int Double
143 reduceDiagos diagos = mapKeys (\(k,_) -> k)
144 $ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
146 filterPointersByPeriod :: Filiation -> [(Pointer,[Int])] -> [Pointer]
147 filterPointersByPeriod fil pts =
148 let pts' = sortOn (fst . fst . fst . fst) pts
149 inf = (fst . fst . fst . fst) $ head' "filterPointersByPeriod" pts'
150 sup = (fst . fst . fst . fst) $ last' "filterPointersByPeriod" pts'
152 $ nubBy (\pt pt' -> snd pt == snd pt')
153 $ filter (\pt -> ((fst . fst . fst . fst) pt == inf) || ((fst . fst . fst . fst) pt == sup))
155 ToParents -> reverse pts'
158 phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
159 -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
160 phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
161 if (null $ filterPointers proxi thr oldPointers)
162 {- let's find new pointers -}
163 then if null nextPointers
165 else filterPointersByPeriod fil
166 $ head' "phyloGroupMatching"
167 -- Keep only the best set of pointers grouped by proximity
168 $ groupBy (\pt pt' -> (snd . fst) pt == (snd . fst) pt')
169 $ reverse $ sortOn (snd . fst) $ head' "pointers" nextPointers
170 -- Find the first time frame where at leats one pointer satisfies the proximity threshold
173 nextPointers :: [[(Pointer,[Int])]]
174 nextPointers = take 1
176 {- for each time frame, process the proximity on relevant pairs of targeted groups -}
177 $ scanl (\acc groups ->
178 let periods = nub $ map (fst . fst . fst) $ concat groups
179 nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
181 $ filterDiago diagos ([(fst . fst) id] ++ periods)
182 {- important resize nbdocs et diago dans le make pairs -}
183 pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
184 in acc ++ ( filterPointers' proxi thr
187 {- process the proximity between the current group and a pair of candidates -}
188 let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
189 in if ((c == c') || (snd c == snd c'))
190 then [((fst c,proximity),snd c)]
191 else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) pairs )) []
192 $ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...]
195 filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
196 filterDocs d pds = restrictKeys d $ periodsToYears pds
198 filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
199 filterDiago diago pds = restrictKeys diago $ periodsToYears pds
202 -----------------------------
203 -- | Matching Processing | --
204 -----------------------------
207 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
208 getNextPeriods fil max' pId pIds =
210 ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
211 ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
214 getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
215 getCandidates ego targets =
217 filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')
221 matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
222 matchGroupsToGroups frame periods proximity thr docs coocs groups =
223 let groups' = groupByField _phylo_groupPeriod groups
224 in foldl' (\acc prd ->
225 let -- 1) find the parents/childs matching periods
226 periodsPar = getNextPeriods ToParents frame prd periods
227 periodsChi = getNextPeriods ToChilds frame prd periods
228 -- 2) find the parents/childs matching candidates
229 candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
230 candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
231 -- 3) find the parents/child number of docs by years
232 docsPar = filterDocs docs ([prd] ++ periodsPar)
233 docsChi = filterDocs docs ([prd] ++ periodsChi)
234 -- 4) find the parents/child diago by years
235 diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
236 diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
237 -- 5) match in parallel all the groups (egos) to their possible candidates
239 let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
240 thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
241 pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi
242 thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
243 in addPointers ToChilds TemporalPointer pointersChi
244 $ addPointers ToParents TemporalPointer pointersPar ego)
245 $ findWithDefault [] prd groups'
246 egos' = egos `using` parList rdeepseq
251 -----------------------
252 -- | Phylo Quality | --
253 -----------------------
256 relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
257 relevantBranches term branches =
258 filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
260 accuracy :: Int -> [(Date,Date)] -> [PhyloGroup] -> Double
261 accuracy x periods bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk')
262 / (fromIntegral $ length bk'))
265 bk' = filter (\g -> elem (g ^. phylo_groupPeriod) periods) bk
267 recall :: Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
268 recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
269 / (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
271 fScore :: Double -> Int -> [(Date,Date)] -> [PhyloGroup] -> [[PhyloGroup]] -> Double
272 fScore beta x periods bk bx =
273 let rec = recall x bk bx
274 acc = accuracy x periods bk
275 in ((1 + beta ** 2) * acc * rec)
276 / (((beta ** 2) * rec + acc))
279 wk :: [PhyloGroup] -> Double
280 wk bk = fromIntegral $ length bk
283 toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
284 toPhyloQuality' beta freq branches =
289 let bks = relevantBranches i branches
290 periods = nub $ map _phylo_groupPeriod $ concat bks
291 in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i periods bk bks)) bks))
294 toRecall :: Map Int Double -> [[PhyloGroup]] -> Double
295 toRecall freq branches =
301 bx = relevantBranches x branches
302 wks = sum $ map wk bx
303 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (recall x bk bx)) bx))
307 pys = sum (elems freq)
310 toAccuracy :: Map Int Double -> [[PhyloGroup]] -> Double
311 toAccuracy freq branches =
317 bx = relevantBranches x branches
318 -- | periods containing x
319 periods = nub $ map _phylo_groupPeriod $ concat bx
320 wks = sum $ map wk bx
321 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (accuracy x periods bk)) bx))
325 pys = sum (elems freq)
328 -- | here we do the average of all the local f_scores
329 toPhyloQuality :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
330 toPhyloQuality beta freq branches =
336 bx = relevantBranches x branches
337 -- | periods containing x
338 periods = nub $ map _phylo_groupPeriod $ concat bx
339 wks = sum $ map wk bx
340 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x periods bk bx)) bx))
344 pys = sum (elems freq)
346 -- 1 / nb de foundation
348 ------------------------------------
349 -- | Constant Temporal Matching | --
350 ------------------------------------
353 groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
354 groupsToBranches' groups =
355 {- run the related component algorithm -}
356 let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
357 $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
358 $ map (\group -> [getGroupId group]
359 ++ (map fst $ group ^. phylo_groupPeriodParents)
360 ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
361 -- first find the related components by inside each ego's period
363 graph' = map relatedComponents egos
364 -- then run it for the all the periods
366 $ relatedComponents $ concat (graph' `using` parList rdeepseq)
367 -- update each group's branch id
368 in map (\(bId,ids) ->
369 let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
370 $ elems $ restrictKeys groups (Set.fromList ids)
371 in groups' `using` parList rdeepseq ) graph
374 reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
375 reduceFrequency frequency branches =
376 restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
378 updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
379 updateThr thr branches = map (\b -> map (\g ->
380 g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
383 -- Sequentially break each branch of a phylo where
384 -- done = all the allready broken branches
385 -- ego = the current branch we want to break
386 -- rest = the branches we still have to break
387 breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
388 -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
389 breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
390 -- 1) keep or not the new division of ego
391 let done' = done ++ (if snd ego
393 (if ((null (fst ego')) || (quality > quality'))
395 -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
396 -- <> " | " <> show(length $ fst ego) <> " groups : "
397 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
398 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
401 -- trace (" ✓ level = " <> printf "%.1f" thr <> "")
402 -- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
403 -- <> " | " <> show(length $ fst ego) <> " groups : "
404 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
405 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
406 ((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
409 -- 2) if there is no more branches in rest then return else continue
412 else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
413 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
415 --------------------------------------
417 quality = toPhyloQuality beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
418 --------------------------------------
419 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
421 let branches = groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
422 $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
423 branches' = branches `using` parList rdeepseq
424 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
426 $ depthToMeta (elevation - depth) branches'
427 --------------------------------------
429 quality' = toPhyloQuality beta frequency
430 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
433 seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
434 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
435 seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
436 -- if there is no branch to break or if seaLvl level > 1 then end
437 if (thr >= 1) || ((not . or) $ map snd branches)
440 -- break all the possible branches at the current seaLvl level
441 let quality = toPhyloQuality beta frequency (map fst branches)
442 acc = toAccuracy frequency (map fst branches)
443 rec = toRecall frequency (map fst branches)
444 branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(β) = " <> printf "%.5f" quality
445 <> " ξ = " <> printf "%.5f" acc
446 <> " ρ = " <> printf "%.5f" rec
447 <> " branches = " <> show(length branches) <> " ↴")
448 $ breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
449 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
450 frequency' = reduceFrequency frequency (map fst branches')
451 in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
454 constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
455 constanteTemporalMatching start step phylo = updatePhyloGroups 1
456 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
457 (toPhyloHorizon phylo)
459 -- 2) process the temporal matching by elevating seaLvl level
460 branches :: [[PhyloGroup]]
462 $ seaLevelMatching (phyloProximity $ getConfig phylo)
463 (_qua_granularity $ phyloQuality $ getConfig phylo)
464 (_qua_minBranch $ phyloQuality $ getConfig phylo)
465 (phylo ^. phylo_termFreq)
467 ((((1 - start) / step) - 1))
468 (((1 - start) / step))
469 (getTimeFrame $ timeUnit $ getConfig phylo)
471 (phylo ^. phylo_timeDocs)
472 (phylo ^. phylo_timeCooc)
474 -- 1) for each group process an initial temporal Matching
475 -- here we suppose that all the groups of level 1 are part of the same big branch
476 groups :: [([PhyloGroup],Bool)]
477 groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
478 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
479 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
480 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
482 (phylo ^. phylo_timeDocs)
483 (phylo ^. phylo_timeCooc)
484 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
490 toPhyloHorizon :: Phylo -> Phylo
491 toPhyloHorizon phylo =
492 let t0 = take 1 (getPeriodIds phylo)
493 groups = getGroupsFromLevelPeriods 1 t0 phylo
494 sens = getSensibility (phyloProximity $ getConfig phylo)
495 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) t0
496 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) t0
497 in phylo & phylo_horizon .~ (fromList $ map (\(g,g') ->
498 ((getGroupId g,getGroupId g'),weightedLogJaccard' sens nbDocs diago (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) $ listToCombi' groups)
501 --------------------------------------
502 -- | Adaptative Temporal Matching | --
503 --------------------------------------
506 thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
507 thrToMeta thr branches =
509 map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
511 depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
512 depthToMeta depth branches =
513 let break = length branches > 1
516 if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
519 reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
520 reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
523 getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
525 | isJust (m !? ( k ,k')) = m ! ( k ,k')
526 | isJust (m !? ( k',k )) = m ! ( k',k )
530 toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
531 toThreshold lvl proxiGroups =
532 let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
534 then (sort $ elems proxiGroups) !! idx
538 -- done = all the allready broken branches
539 -- ego = the current branch we want to break
540 -- rest = the branches we still have to break
541 adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
542 -> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
543 -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
544 -> [([PhyloGroup],(Bool,[Double]))]
545 adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
546 -- 1) keep or not the new division of ego
547 let done' = done ++ (if (fst . snd) ego
548 then (if ((null (fst ego')) || (quality > quality'))
550 [(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
552 ( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
553 ++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
554 else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
556 -- uncomment let .. in for debugging
557 -- let part1 = partition (snd) done'
558 -- part2 = partition (snd) rest
559 -- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
560 -- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
562 -- 2) if there is no more branches in rest then return else continue
565 else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
566 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
568 --------------------------------------
570 thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
571 --------------------------------------
573 quality = toPhyloQuality beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
574 --------------------------------------
575 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
577 let branches = groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
578 $ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
579 branches' = branches `using` parList rdeepseq
580 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
582 $ depthToMeta (elevation - depth) branches'
583 --------------------------------------
585 quality' = toPhyloQuality beta frequency
586 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
589 adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
590 -> Double -> Int -> Map Int Double
591 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
592 -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
593 adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
594 -- if there is no branch to break or if seaLvl level >= depth then end
595 if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
598 -- break all the possible branches at the current seaLvl level
599 let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
600 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
601 frequency' = reduceFrequency frequency (map fst branches')
602 groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
603 -- thr = toThreshold depth groupsProxi
604 in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
605 <> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
606 <> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
608 $ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches'
611 adaptativeTemporalMatching :: Double -> Phylo -> Phylo
612 adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
613 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
614 (toPhyloHorizon phylo)
616 -- 2) process the temporal matching by elevating seaLvl level
617 branches :: [[PhyloGroup]]
619 $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
622 (phylo ^. phylo_groupsProxi)
623 (_qua_granularity $ phyloQuality $ getConfig phylo)
624 (_qua_minBranch $ phyloQuality $ getConfig phylo)
625 (phylo ^. phylo_termFreq)
626 (getTimeFrame $ timeUnit $ getConfig phylo)
628 (phylo ^. phylo_timeDocs)
629 (phylo ^. phylo_timeCooc)
631 -- 1) for each group process an initial temporal Matching
632 -- here we suppose that all the groups of level 1 are part of the same big branch
633 groups :: [([PhyloGroup],(Bool,[Double]))]
634 groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
635 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
636 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
637 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
639 (phylo ^. phylo_timeDocs)
640 (phylo ^. phylo_timeCooc)
641 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
642 --------------------------------------
644 thr = toThreshold elevation (phylo ^. phylo_groupsProxi)