2 Module : Gargantext.Core.Viz.Phylo.TemporalMatching
3 Description : Module dedicated to the adaptative temporal matching of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Viz.Phylo.TemporalMatching where
14 import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or, sort, (!!))
15 import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
17 import Gargantext.Prelude
18 import Gargantext.Core.Viz.AdaptativePhylo
19 import Gargantext.Core.Viz.Phylo.PhyloTools
21 import Prelude (floor,tan,pi)
22 import Control.Lens hiding (Level)
23 import Control.Parallel.Strategies (parList, rdeepseq, using)
24 import Debug.Trace (trace)
28 import qualified Data.Map as Map
29 import qualified Data.Set as Set
30 import qualified Data.Vector as Vector
39 -- | To compute a jaccard similarity between two lists
40 jaccard :: [Int] -> [Int] -> Double
41 jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
44 -- | Process the inverse sumLog
45 sumInvLog' :: Double -> Double -> [Double] -> Double
46 sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
49 -- | Process the sumLog
50 sumLog' :: Double -> Double -> [Double] -> Double
51 sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
54 weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
55 weightedLogJaccard' sens nbDocs diago ngrams ngrams'
56 | null ngramsInter = 0
57 | ngramsInter == ngramsUnion = 1
58 | sens == 0 = jaccard ngramsInter ngramsUnion
59 | sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
60 | otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
62 --------------------------------------
64 ngramsInter = intersect ngrams ngrams'
65 --------------------------------------
67 ngramsUnion = union ngrams ngrams'
68 --------------------------------------
69 diagoInter :: [Double]
70 diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
71 --------------------------------------
72 diagoUnion :: [Double]
73 diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
74 --------------------------------------
77 -- | To process the proximity between a current group and a pair of targets group
78 toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
79 toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
81 WeightedLogJaccard sens ->
82 let pairNgrams = if targetNgrams == targetNgrams'
84 else union targetNgrams targetNgrams'
85 in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
89 ------------------------
90 -- | Local Matching | --
91 ------------------------
93 findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
94 findLastPeriod fil periods = case fil of
95 ToParents -> head' "findLastPeriod" (sortOn fst periods)
96 ToChilds -> last' "findLastPeriod" (sortOn fst periods)
99 -- | To filter pairs of candidates related to old pointers periods
100 removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
101 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
102 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
103 removeOldPointers oldPointers fil thr prox prd pairs
104 | null oldPointers = pairs
105 | null (filterPointers prox thr oldPointers) =
106 let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
107 in if lastMatchedPrd == prd
109 else filter (\((id,_),(id',_)) ->
111 ToParents -> (((fst . fst . fst) id ) < (fst lastMatchedPrd))
112 || (((fst . fst . fst) id') < (fst lastMatchedPrd))
113 ToChilds -> (((fst . fst . fst) id ) > (fst lastMatchedPrd))
114 || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
118 makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
119 -> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
120 makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
123 else removeOldPointers oldPointers fil thr prox lastPrd
124 {- at least on of the pair candidates should be from the last added period -}
125 $ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
127 $ filter (\(id,ngrams) ->
128 let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
129 diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
130 in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
133 lastPrd :: PhyloPeriodId
134 lastPrd = findLastPeriod fil periods
137 filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
138 filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
140 filterPointers' :: Proximity -> Double -> [(Pointer,[Int])] -> [(Pointer,[Int])]
141 filterPointers' proxi thr pts = filter (\((_,w),_) -> filterProximity proxi thr w) pts
144 reduceDiagos :: Map Date Cooc -> Map Int Double
145 reduceDiagos diagos = mapKeys (\(k,_) -> k)
146 $ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
148 filterPointersByPeriod :: Filiation -> [(Pointer,[Int])] -> [Pointer]
149 filterPointersByPeriod fil pts =
150 let pts' = sortOn (fst . fst . fst . fst) pts
151 inf = (fst . fst . fst . fst) $ head' "filterPointersByPeriod" pts'
152 sup = (fst . fst . fst . fst) $ last' "filterPointersByPeriod" pts'
154 $ nubBy (\pt pt' -> snd pt == snd pt')
155 $ filter (\pt -> ((fst . fst . fst . fst) pt == inf) || ((fst . fst . fst . fst) pt == sup))
157 ToParents -> reverse pts'
160 phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
161 -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
162 phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
163 if (null $ filterPointers proxi thr oldPointers)
164 {- let's find new pointers -}
165 then if null nextPointers
167 else filterPointersByPeriod fil
168 $ head' "phyloGroupMatching"
169 -- Keep only the best set of pointers grouped by proximity
170 $ groupBy (\pt pt' -> (snd . fst) pt == (snd . fst) pt')
171 $ reverse $ sortOn (snd . fst) $ head' "pointers" nextPointers
172 -- Find the first time frame where at leats one pointer satisfies the proximity threshold
175 nextPointers :: [[(Pointer,[Int])]]
176 nextPointers = take 1
178 {- for each time frame, process the proximity on relevant pairs of targeted groups -}
179 $ scanl (\acc groups ->
180 let periods = nub $ map (fst . fst . fst) $ concat groups
181 nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
183 $ filterDiago diagos ([(fst . fst) id] ++ periods)
184 {- important resize nbdocs et diago dans le make pairs -}
185 pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
186 in acc ++ ( filterPointers' proxi thr
189 {- process the proximity between the current group and a pair of candidates -}
190 let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
191 in if ((c == c') || (snd c == snd c'))
192 then [((fst c,proximity),snd c)]
193 else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) pairs )) []
194 $ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...]
197 filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
198 filterDocs d pds = restrictKeys d $ periodsToYears pds
200 filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
201 filterDiago diago pds = restrictKeys diago $ periodsToYears pds
204 -----------------------------
205 -- | Matching Processing | --
206 -----------------------------
209 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
210 getNextPeriods fil max' pId pIds =
212 ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
213 ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
216 getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
217 getCandidates ego targets =
219 filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')
223 matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
224 matchGroupsToGroups frame periods proximity thr docs coocs groups =
225 let groups' = groupByField _phylo_groupPeriod groups
226 in foldl' (\acc prd ->
227 let -- 1) find the parents/childs matching periods
228 periodsPar = getNextPeriods ToParents frame prd periods
229 periodsChi = getNextPeriods ToChilds frame prd periods
230 -- 2) find the parents/childs matching candidates
231 candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
232 candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
233 -- 3) find the parents/child number of docs by years
234 docsPar = filterDocs docs ([prd] ++ periodsPar)
235 docsChi = filterDocs docs ([prd] ++ periodsChi)
236 -- 4) find the parents/child diago by years
237 diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
238 diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
239 -- 5) match in parallel all the groups (egos) to their possible candidates
241 let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
242 thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
243 pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi
244 thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
245 in addPointers ToChilds TemporalPointer pointersChi
246 $ addPointers ToParents TemporalPointer pointersPar ego)
247 $ findWithDefault [] prd groups'
248 egos' = egos `using` parList rdeepseq
253 -----------------------
254 -- | Phylo Quality | --
255 -----------------------
258 relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
259 relevantBranches term branches =
260 filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
262 accuracy :: Int -> [(Date,Date)] -> [PhyloGroup] -> Double
263 -- The accuracy of a branch relatively to a term x is computed only over the periods there exist some cluster mentionning x in the phylomemy
264 accuracy x periods bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk')
265 / (fromIntegral $ length bk'))
268 bk' = filter (\g -> elem (g ^. phylo_groupPeriod) periods) bk
270 recall :: Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
271 recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
272 / (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
274 fScore :: Double -> Int -> [(Date,Date)] -> [PhyloGroup] -> [[PhyloGroup]] -> Double
275 fScore lambda x periods bk bx =
276 let rec = recall x bk bx
277 acc = accuracy x periods bk
278 in ((1 + lambda ** 2) * acc * rec)
279 / (((lambda ** 2) * acc + rec))
282 wk :: [PhyloGroup] -> Double
283 wk bk = fromIntegral $ length bk
286 toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
287 toPhyloQuality' lambda freq branches =
292 let bks = relevantBranches i branches
293 periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem i $ g ^. phylo_groupNgrams) $ concat bks
294 in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore lambda i periods bk bks)) bks))
297 toRecall :: Map Int Double -> [[PhyloGroup]] -> Double
298 toRecall freq branches =
304 bx = relevantBranches x branches
305 wks = sum $ map wk bx
306 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (recall x bk bx)) bx))
310 pys = sum (elems freq)
313 toAccuracy :: Map Int Double -> [[PhyloGroup]] -> Double
314 toAccuracy freq branches =
320 bx = relevantBranches x branches
321 -- | periods containing x
322 periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx
323 wks = sum $ map wk bx
324 in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (accuracy x periods bk)) bx))
328 pys = sum (elems freq)
331 -- | here we do the average of all the local f_scores
332 toPhyloQuality :: Double -> Double -> Map Int Double -> [[PhyloGroup]] -> Double
333 toPhyloQuality fdt lambda freq branches =
339 let bx = relevantBranches x branches
340 -- | periods containing x
341 periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx
342 wks = sum $ map wk bx
343 -- in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x bk bx)) bx))
344 -- in (1 / fdt) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x periods bk bx)) bx))
345 in (1 / fdt) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore (tan (lambda * pi / 2)) x periods bk bx)) bx))
349 -- pys = sum (elems freq)
351 -- 1 / nb de foundation
353 ------------------------------------
354 -- | Constant Temporal Matching | --
355 ------------------------------------
358 groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
359 groupsToBranches' groups =
360 {- run the related component algorithm -}
361 let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
362 $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
363 $ map (\group -> [getGroupId group]
364 ++ (map fst $ group ^. phylo_groupPeriodParents)
365 ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
366 -- first find the related components by inside each ego's period
368 graph' = map relatedComponents egos
369 -- then run it for the all the periods
371 $ relatedComponents $ concat (graph' `using` parList rdeepseq)
372 -- update each group's branch id
373 in map (\(bId,ids) ->
374 let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
375 $ elems $ restrictKeys groups (Set.fromList ids)
376 in groups' `using` parList rdeepseq ) graph
379 reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
380 reduceFrequency frequency branches =
381 restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
383 updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
384 updateThr thr branches = map (\b -> map (\g ->
385 g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
388 -- Sequentially break each branch of a phylo where
389 -- done = all the allready broken branches
390 -- ego = the current branch we want to break
391 -- rest = the branches we still have to break
392 breakBranches :: Double -> Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
393 -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
394 breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
395 -- 1) keep or not the new division of ego
396 let done' = done ++ (if snd ego
398 (if ((null (fst ego')) || (quality > quality'))
400 -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
401 -- <> " | " <> show(length $ fst ego) <> " groups : "
402 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
403 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
406 -- trace (" ✓ level = " <> printf "%.1f" thr <> "")
407 -- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
408 -- <> " | " <> show(length $ fst ego) <> " groups : "
409 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
410 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
411 ((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
414 -- 2) if there is no more branches in rest then return else continue
417 else breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods
418 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
420 --------------------------------------
422 quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
423 --------------------------------------
424 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
426 let branches = groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
427 $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
428 branches' = branches `using` parList rdeepseq
429 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
431 $ depthToMeta (elevation - depth) branches'
432 --------------------------------------
434 quality' = toPhyloQuality fdt lambda frequency
435 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
438 seaLevelMatching :: Double -> Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
439 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
440 seaLevelMatching fdt proximity lambda minBranch frequency thr step depth elevation frame periods docs coocs branches =
441 -- if there is no branch to break or if seaLvl level > 1 then end
442 if (thr >= 1) || ((not . or) $ map snd branches)
445 -- break all the possible branches at the current seaLvl level
446 let quality = toPhyloQuality fdt lambda frequency (map fst branches)
447 acc = toAccuracy frequency (map fst branches)
448 rec = toRecall frequency (map fst branches)
449 branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(λ) = " <> printf "%.5f" quality
450 <> " ξ = " <> printf "%.5f" acc
451 <> " ρ = " <> printf "%.5f" rec
452 <> " branches = " <> show(length branches) <> " ↴")
453 $ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods
454 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
455 frequency' = reduceFrequency frequency (map fst branches')
456 in seaLevelMatching fdt proximity lambda minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
459 constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
460 constanteTemporalMatching start step phylo = updatePhyloGroups 1
461 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
462 (toPhyloHorizon phylo)
464 -- 2) process the temporal matching by elevating seaLvl level
465 branches :: [[PhyloGroup]]
467 $ seaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo)
468 (phyloProximity $ getConfig phylo)
469 (_qua_granularity $ phyloQuality $ getConfig phylo)
470 (_qua_minBranch $ phyloQuality $ getConfig phylo)
471 (phylo ^. phylo_termFreq)
473 ((((1 - start) / step) - 1))
474 (((1 - start) / step))
475 (getTimeFrame $ timeUnit $ getConfig phylo)
477 (phylo ^. phylo_timeDocs)
478 (phylo ^. phylo_timeCooc)
479 (reverse $ sortOn (length . fst) groups)
480 -- 1) for each group process an initial temporal Matching
481 -- here we suppose that all the groups of level 1 are part of the same big branch
482 groups :: [([PhyloGroup],Bool)]
483 groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
484 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
485 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
486 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
488 (phylo ^. phylo_timeDocs)
489 (phylo ^. phylo_timeCooc)
490 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
496 toPhyloHorizon :: Phylo -> Phylo
497 toPhyloHorizon phylo =
498 let t0 = take 1 (getPeriodIds phylo)
499 groups = getGroupsFromLevelPeriods 1 t0 phylo
500 sens = getSensibility (phyloProximity $ getConfig phylo)
501 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) t0
502 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) t0
503 in phylo & phylo_horizon .~ (fromList $ map (\(g,g') ->
504 ((getGroupId g,getGroupId g'),weightedLogJaccard' sens nbDocs diago (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) $ listToCombi' groups)
507 --------------------------------------
508 -- | Adaptative Temporal Matching | --
509 --------------------------------------
512 thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
513 thrToMeta thr branches =
515 map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
517 depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
518 depthToMeta depth branches =
519 let break = length branches > 1
522 if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
525 reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
526 reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
529 getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
531 | isJust (m !? ( k ,k')) = m ! ( k ,k')
532 | isJust (m !? ( k',k )) = m ! ( k',k )
536 toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
537 toThreshold lvl proxiGroups =
538 let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
540 then (sort $ elems proxiGroups) !! idx
544 -- done = all the allready broken branches
545 -- ego = the current branch we want to break
546 -- rest = the branches we still have to break
547 adaptativeBreakBranches :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
548 -> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
549 -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
550 -> [([PhyloGroup],(Bool,[Double]))]
551 adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods done ego rest =
552 -- 1) keep or not the new division of ego
553 let done' = done ++ (if (fst . snd) ego
554 then (if ((null (fst ego')) || (quality > quality'))
556 [(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
558 ( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
559 ++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
560 else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
562 -- uncomment let .. in for debugging
563 -- let part1 = partition (snd) done'
564 -- part2 = partition (snd) rest
565 -- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
566 -- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
568 -- 2) if there is no more branches in rest then return else continue
571 else adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
572 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
574 --------------------------------------
576 thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
577 --------------------------------------
579 quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
580 --------------------------------------
581 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
583 let branches = groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
584 $ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
585 branches' = branches `using` parList rdeepseq
586 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
588 $ depthToMeta (elevation - depth) branches'
589 --------------------------------------
591 quality' = toPhyloQuality fdt lambda frequency
592 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
595 adaptativeSeaLevelMatching :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
596 -> Double -> Int -> Map Int Double
597 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
598 -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
599 adaptativeSeaLevelMatching fdt proxiConf depth elevation groupsProxi lambda minBranch frequency frame periods docs coocs branches =
600 -- if there is no branch to break or if seaLvl level >= depth then end
601 if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
604 -- break all the possible branches at the current seaLvl level
605 let branches' = adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
606 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
607 frequency' = reduceFrequency frequency (map fst branches')
608 groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
609 -- thr = toThreshold depth groupsProxi
610 in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
611 <> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
612 <> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
614 $ adaptativeSeaLevelMatching fdt proxiConf (depth - 1) elevation groupsProxi' lambda minBranch frequency' frame periods docs coocs branches'
617 adaptativeTemporalMatching :: Double -> Phylo -> Phylo
618 adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
619 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
620 (toPhyloHorizon phylo)
622 -- 2) process the temporal matching by elevating seaLvl level
623 branches :: [[PhyloGroup]]
625 $ adaptativeSeaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo)
626 (phyloProximity $ getConfig phylo)
629 (phylo ^. phylo_groupsProxi)
630 (_qua_granularity $ phyloQuality $ getConfig phylo)
631 (_qua_minBranch $ phyloQuality $ getConfig phylo)
632 (phylo ^. phylo_termFreq)
633 (getTimeFrame $ timeUnit $ getConfig phylo)
635 (phylo ^. phylo_timeDocs)
636 (phylo ^. phylo_timeCooc)
638 -- 1) for each group process an initial temporal Matching
639 -- here we suppose that all the groups of level 1 are part of the same big branch
640 groups :: [([PhyloGroup],(Bool,[Double]))]
641 groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
642 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
643 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
644 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
646 (phylo ^. phylo_timeDocs)
647 (phylo ^. phylo_timeCooc)
648 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
649 --------------------------------------
651 thr = toThreshold elevation (phylo ^. phylo_groupsProxi)