]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/TemporalMatching.hs
Merge branch 'dev-default-extensions' into dev
[gargantext.git] / src / Gargantext / Viz / Phylo / TemporalMatching.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.TemporalMatching
3 Description : Module dedicated to the adaptative temporal matching of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11
12 module Gargantext.Viz.Phylo.TemporalMatching where
13
14 import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or, sort, (!!))
15 import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
16
17 import Gargantext.Prelude
18 import Gargantext.Viz.AdaptativePhylo
19 import Gargantext.Viz.Phylo.PhyloTools
20
21 import Prelude (floor)
22 import Control.Lens hiding (Level)
23 import Control.Parallel.Strategies (parList, rdeepseq, using)
24 import Debug.Trace (trace)
25
26 import qualified Data.Map as Map
27 import qualified Data.Set as Set
28
29
30 -------------------
31 -- | Proximity | --
32 -------------------
33
34
35 -- | To compute a jaccard similarity between two lists
36 jaccard :: [Int] -> [Int] -> Double
37 jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
38
39
40 -- | Process the inverse sumLog
41 sumInvLog' :: Double -> Double -> [Double] -> Double
42 sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
43
44
45 -- | Process the sumLog
46 sumLog' :: Double -> Double -> [Double] -> Double
47 sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
48
49
50 weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
51 weightedLogJaccard' sens nbDocs diago ngrams ngrams'
52 | null ngramsInter = 0
53 | ngramsInter == ngramsUnion = 1
54 | sens == 0 = jaccard ngramsInter ngramsUnion
55 | sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
56 | otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
57 where
58 --------------------------------------
59 ngramsInter :: [Int]
60 ngramsInter = intersect ngrams ngrams'
61 --------------------------------------
62 ngramsUnion :: [Int]
63 ngramsUnion = union ngrams ngrams'
64 --------------------------------------
65 diagoInter :: [Double]
66 diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
67 --------------------------------------
68 diagoUnion :: [Double]
69 diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
70 --------------------------------------
71
72
73 -- | To process the proximity between a current group and a pair of targets group
74 toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
75 toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
76 case proximity of
77 WeightedLogJaccard sens ->
78 let pairNgrams = if targetNgrams == targetNgrams'
79 then targetNgrams
80 else union targetNgrams targetNgrams'
81 in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
82 Hamming -> undefined
83
84
85 ------------------------
86 -- | Local Matching | --
87 ------------------------
88
89 findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
90 findLastPeriod fil periods = case fil of
91 ToParents -> head' "findLastPeriod" (sortOn fst periods)
92 ToChilds -> last' "findLastPeriod" (sortOn fst periods)
93
94
95 -- | To filter pairs of candidates related to old pointers periods
96 removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
97 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
98 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
99 removeOldPointers oldPointers fil thr prox prd pairs
100 | null oldPointers = pairs
101 | null (filterPointers prox thr oldPointers) =
102 let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
103 in if lastMatchedPrd == prd
104 then []
105 else filter (\((id,_),(id',_)) ->
106 case fil of
107 ToParents -> (((fst . fst . fst) id ) < (fst lastMatchedPrd))
108 || (((fst . fst . fst) id') < (fst lastMatchedPrd))
109 ToChilds -> (((fst . fst . fst) id ) > (fst lastMatchedPrd))
110 || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
111 | otherwise = []
112
113
114 makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
115 -> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
116 makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
117 if (null periods)
118 then []
119 else removeOldPointers oldPointers fil thr prox lastPrd
120 -- | at least on of the pair candidates should be from the last added period
121 $ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
122 $ listToKeys
123 $ filter (\(id,ngrams) ->
124 let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
125 diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
126 in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
127 ) candidates
128 where
129 lastPrd :: PhyloPeriodId
130 lastPrd = findLastPeriod fil periods
131
132
133 filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
134 filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
135
136
137 reduceDiagos :: Map Date Cooc -> Map Int Double
138 reduceDiagos diagos = mapKeys (\(k,_) -> k)
139 $ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
140
141
142 phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
143 -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
144 phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
145 if (null $ filterPointers proxi thr oldPointers)
146 -- | let's find new pointers
147 then if null nextPointers
148 then []
149 else head' "phyloGroupMatching"
150 -- | Keep only the best set of pointers grouped by proximity
151 $ groupBy (\pt pt' -> snd pt == snd pt')
152 $ reverse $ sortOn snd $ head' "pointers" nextPointers
153 -- | Find the first time frame where at leats one pointer satisfies the proximity threshold
154 else oldPointers
155 where
156 nextPointers :: [[Pointer]]
157 nextPointers = take 1
158 $ dropWhile (null)
159 -- | for each time frame, process the proximity on relevant pairs of targeted groups
160 $ scanl (\acc groups ->
161 let periods = nub $ map (fst . fst . fst) $ concat groups
162 nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
163 diago = reduceDiagos
164 $ filterDiago diagos ([(fst . fst) id] ++ periods)
165 -- | important resize nbdocs et diago dans le make pairs
166 pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
167 in acc ++ ( filterPointers proxi thr
168 $ concat
169 $ map (\(c,c') ->
170 -- | process the proximity between the current group and a pair of candidates
171 let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
172 in if (c == c')
173 then [(fst c,proximity)]
174 else [(fst c,proximity),(fst c',proximity)] ) pairs )) []
175 $ inits candidates -- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
176
177
178 filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
179 filterDocs d pds = restrictKeys d $ periodsToYears pds
180
181 filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
182 filterDiago diago pds = restrictKeys diago $ periodsToYears pds
183
184
185 -----------------------------
186 -- | Matching Processing | --
187 -----------------------------
188
189
190 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
191 getNextPeriods fil max' pId pIds =
192 case fil of
193 ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
194 ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
195
196
197 getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
198 getCandidates ego targets =
199 map (\groups' ->
200 filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')
201 ) groups') targets
202
203
204 matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
205 matchGroupsToGroups frame periods proximity thr docs coocs groups =
206 let groups' = groupByField _phylo_groupPeriod groups
207 in foldl' (\acc prd ->
208 let -- | 1) find the parents/childs matching periods
209 periodsPar = getNextPeriods ToParents frame prd periods
210 periodsChi = getNextPeriods ToChilds frame prd periods
211 -- | 2) find the parents/childs matching candidates
212 candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
213 candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
214 -- | 3) find the parents/child number of docs by years
215 docsPar = filterDocs docs ([prd] ++ periodsPar)
216 docsChi = filterDocs docs ([prd] ++ periodsChi)
217 -- | 4) find the parents/child diago by years
218 diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
219 diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
220 -- | 5) match in parallel all the groups (egos) to their possible candidates
221 egos = map (\ego ->
222 let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
223 thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
224 pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi
225 thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
226 in addPointers ToChilds TemporalPointer pointersChi
227 $ addPointers ToParents TemporalPointer pointersPar ego)
228 $ findWithDefault [] prd groups'
229 egos' = egos `using` parList rdeepseq
230 in acc ++ egos'
231 ) [] periods
232
233
234 -----------------------
235 -- | Phylo Quality | --
236 -----------------------
237
238
239 relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
240 relevantBranches term branches =
241 filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
242
243 fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
244 fScore beta i bk bks =
245 let recall = ( (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) bk)
246 / (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) $ concat bks))
247 accuracy = ( (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) bk)
248 / (fromIntegral $ length bk))
249 in ((1 + beta ** 2) * accuracy * recall)
250 / (((beta ** 2) * accuracy + recall))
251
252
253 wk :: [PhyloGroup] -> Double
254 wk bk = fromIntegral $ length bk
255
256
257 toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
258 toPhyloQuality' beta freq branches =
259 if (null branches)
260 then 0
261 else sum
262 $ map (\i ->
263 let bks = relevantBranches i branches
264 in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i bk bks)) bks))
265 $ keys freq
266
267
268 ------------------------------------
269 -- | Constant Temporal Matching | --
270 ------------------------------------
271
272
273 groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
274 groupsToBranches groups =
275 -- | run the related component algorithm
276 let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
277 $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
278 $ map (\group -> [getGroupId group]
279 ++ (map fst $ group ^. phylo_groupPeriodParents)
280 ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
281 -- | first find the related components by inside each ego's period
282 -- | a supprimer
283 graph' = map relatedComponents egos
284 -- | then run it for the all the periods
285 graph = zip [1..]
286 $ relatedComponents $ concat (graph' `using` parList rdeepseq)
287 -- | update each group's branch id
288 in map (\(bId,ids) ->
289 let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
290 $ elems $ restrictKeys groups (Set.fromList ids)
291 in groups' `using` parList rdeepseq ) graph
292
293
294 reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
295 reduceFrequency frequency branches =
296 restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
297
298 updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
299 updateThr thr branches = map (\b -> map (\g ->
300 g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
301
302
303 -- | Sequentially break each branch of a phylo where
304 -- done = all the allready broken branches
305 -- ego = the current branch we want to break
306 -- rest = the branches we still have to break
307 breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
308 -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
309 breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
310 -- | 1) keep or not the new division of ego
311 let done' = done ++ (if snd ego
312 then
313 (if ((null (fst ego')) || (quality > quality'))
314 then
315 -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
316 -- <> " | " <> show(length $ fst ego) <> " groups : "
317 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
318 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
319 [(fst ego,False)]
320 else
321 -- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
322 -- <> " | " <> show(length $ fst ego) <> " groups : "
323 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
324 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
325 ((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
326 else [ego])
327 in
328 -- | 2) if there is no more branches in rest then return else continue
329 if null rest
330 then done'
331 else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
332 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
333 where
334 --------------------------------------
335 quality :: Double
336 quality = toPhyloQuality' beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
337 --------------------------------------
338 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
339 ego' =
340 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
341 $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
342 branches' = branches `using` parList rdeepseq
343 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
344 $ thrToMeta thr
345 $ depthToMeta (elevation - depth) branches'
346 --------------------------------------
347 quality' :: Double
348 quality' = toPhyloQuality' beta frequency
349 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
350
351
352 seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
353 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
354 seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
355 -- | if there is no branch to break or if seaLvl level > 1 then end
356 if (thr >= 1) || ((not . or) $ map snd branches)
357 then branches
358 else
359 -- | break all the possible branches at the current seaLvl level
360 let branches' = breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
361 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
362 frequency' = reduceFrequency frequency (map fst branches')
363 in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
364
365
366 constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
367 constanteTemporalMatching start step phylo = updatePhyloGroups 1
368 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
369 phylo
370 where
371 -- | 2) process the temporal matching by elevating seaLvl level
372 branches :: [[PhyloGroup]]
373 branches = map fst
374 $ seaLevelMatching (phyloProximity $ getConfig phylo)
375 (_qua_granularity $ phyloQuality $ getConfig phylo)
376 (_qua_minBranch $ phyloQuality $ getConfig phylo)
377 (phylo ^. phylo_termFreq)
378 start step
379 ((((1 - start) / step) - 1))
380 (((1 - start) / step))
381 (getTimeFrame $ timeUnit $ getConfig phylo)
382 (getPeriodIds phylo)
383 (phylo ^. phylo_timeDocs)
384 (phylo ^. phylo_timeCooc)
385 groups
386 -- | 1) for each group process an initial temporal Matching
387 -- | here we suppose that all the groups of level 1 are part of the same big branch
388 groups :: [([PhyloGroup],Bool)]
389 groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
390 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
391 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
392 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
393 start
394 (phylo ^. phylo_timeDocs)
395 (phylo ^. phylo_timeCooc)
396 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
397
398
399 --------------------------------------
400 -- | Adaptative Temporal Matching | --
401 --------------------------------------
402
403
404 thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
405 thrToMeta thr branches =
406 map (\b ->
407 map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
408
409 depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
410 depthToMeta depth branches =
411 let break = length branches > 1
412 in map (\b ->
413 map (\g ->
414 if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
415 else g) b) branches
416
417 reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
418 reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
419
420
421 getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
422 getInTupleMap m k k'
423 | isJust (m !? ( k ,k')) = m ! ( k ,k')
424 | isJust (m !? ( k',k )) = m ! ( k',k )
425 | otherwise = 0
426
427
428 toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
429 toThreshold lvl proxiGroups =
430 let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
431 in if idx >= 0
432 then (sort $ elems proxiGroups) !! idx
433 else 1
434
435
436 -- done = all the allready broken branches
437 -- ego = the current branch we want to break
438 -- rest = the branches we still have to break
439 adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
440 -> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
441 -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
442 -> [([PhyloGroup],(Bool,[Double]))]
443 adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
444 -- | 1) keep or not the new division of ego
445 let done' = done ++ (if (fst . snd) ego
446 then (if ((null (fst ego')) || (quality > quality'))
447 then
448 [(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
449 else
450 ( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
451 ++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
452 else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
453 in
454 -- | uncomment let .. in for debugging
455 -- let part1 = partition (snd) done'
456 -- part2 = partition (snd) rest
457 -- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
458 -- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
459 -- ) $
460 -- | 2) if there is no more branches in rest then return else continue
461 if null rest
462 then done'
463 else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
464 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
465 where
466 --------------------------------------
467 thr :: Double
468 thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
469 --------------------------------------
470 quality :: Double
471 quality = toPhyloQuality' beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
472 --------------------------------------
473 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
474 ego' =
475 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
476 $ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
477 branches' = branches `using` parList rdeepseq
478 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
479 $ thrToMeta thr
480 $ depthToMeta (elevation - depth) branches'
481 --------------------------------------
482 quality' :: Double
483 quality' = toPhyloQuality' beta frequency
484 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
485
486
487 adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
488 -> Double -> Int -> Map Int Double
489 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
490 -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
491 adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
492 -- | if there is no branch to break or if seaLvl level >= depth then end
493 if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
494 then branches
495 else
496 -- | break all the possible branches at the current seaLvl level
497 let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
498 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
499 frequency' = reduceFrequency frequency (map fst branches')
500 groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
501 -- thr = toThreshold depth groupsProxi
502 in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
503 <> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
504 <> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
505 <> " thr = ")
506 $ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches'
507
508
509 adaptativeTemporalMatching :: Double -> Phylo -> Phylo
510 adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
511 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
512 phylo
513 where
514 -- | 2) process the temporal matching by elevating seaLvl level
515 branches :: [[PhyloGroup]]
516 branches = map fst
517 $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
518 (elevation - 1)
519 elevation
520 (phylo ^. phylo_groupsProxi)
521 (_qua_granularity $ phyloQuality $ getConfig phylo)
522 (_qua_minBranch $ phyloQuality $ getConfig phylo)
523 (phylo ^. phylo_termFreq)
524 (getTimeFrame $ timeUnit $ getConfig phylo)
525 (getPeriodIds phylo)
526 (phylo ^. phylo_timeDocs)
527 (phylo ^. phylo_timeCooc)
528 groups
529 -- | 1) for each group process an initial temporal Matching
530 -- | here we suppose that all the groups of level 1 are part of the same big branch
531 groups :: [([PhyloGroup],(Bool,[Double]))]
532 groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
533 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
534 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
535 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
536 thr
537 (phylo ^. phylo_timeDocs)
538 (phylo ^. phylo_timeCooc)
539 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
540 --------------------------------------
541 thr :: Double
542 thr = toThreshold elevation (phylo ^. phylo_groupsProxi)