]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/TemporalMatching.hs
update shell.nix nixpkgs
[gargantext.git] / src / Gargantext / Viz / Phylo / TemporalMatching.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.TemporalMatching
3 Description : Module dedicated to the adaptative temporal matching of a Phylo.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15
16 module Gargantext.Viz.Phylo.TemporalMatching where
17
18 import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, union, dropWhile, partition, or, sort, (!!))
19 import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
20
21 import Gargantext.Prelude
22 import Gargantext.Viz.AdaptativePhylo
23 import Gargantext.Viz.Phylo.PhyloTools
24
25 import Prelude (floor)
26 import Control.Lens hiding (Level)
27 import Control.Parallel.Strategies (parList, rdeepseq, using)
28 import Debug.Trace (trace)
29
30 import qualified Data.Map as Map
31 import qualified Data.Set as Set
32
33
34 -------------------
35 -- | Proximity | --
36 -------------------
37
38
39 -- | To compute a jaccard similarity between two lists
40 jaccard :: [Int] -> [Int] -> Double
41 jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
42
43
44 -- | Process the inverse sumLog
45 sumInvLog' :: Double -> Double -> [Double] -> Double
46 sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
47
48
49 -- | Process the sumLog
50 sumLog' :: Double -> Double -> [Double] -> Double
51 sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
52
53
54 weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
55 weightedLogJaccard' sens nbDocs diago ngrams ngrams'
56 | null ngramsInter = 0
57 | ngramsInter == ngramsUnion = 1
58 | sens == 0 = jaccard ngramsInter ngramsUnion
59 | sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
60 | otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
61 where
62 --------------------------------------
63 ngramsInter :: [Int]
64 ngramsInter = intersect ngrams ngrams'
65 --------------------------------------
66 ngramsUnion :: [Int]
67 ngramsUnion = union ngrams ngrams'
68 --------------------------------------
69 diagoInter :: [Double]
70 diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
71 --------------------------------------
72 diagoUnion :: [Double]
73 diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
74 --------------------------------------
75
76
77 -- | To process the proximity between a current group and a pair of targets group
78 toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
79 toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
80 case proximity of
81 WeightedLogJaccard sens ->
82 let pairNgrams = if targetNgrams == targetNgrams'
83 then targetNgrams
84 else union targetNgrams targetNgrams'
85 in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
86 Hamming -> undefined
87
88
89 ------------------------
90 -- | Local Matching | --
91 ------------------------
92
93 findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
94 findLastPeriod fil periods = case fil of
95 ToParents -> head' "findLastPeriod" (sortOn fst periods)
96 ToChilds -> last' "findLastPeriod" (sortOn fst periods)
97
98
99 -- | To filter pairs of candidates related to old pointers periods
100 removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
101 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
102 -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
103 removeOldPointers oldPointers fil thr prox prd pairs
104 | null oldPointers = pairs
105 | null (filterPointers prox thr oldPointers) =
106 let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
107 in if lastMatchedPrd == prd
108 then []
109 else filter (\((id,_),(id',_)) ->
110 case fil of
111 ToParents -> (((fst . fst . fst) id ) < (fst lastMatchedPrd))
112 || (((fst . fst . fst) id') < (fst lastMatchedPrd))
113 ToChilds -> (((fst . fst . fst) id ) > (fst lastMatchedPrd))
114 || (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
115 | otherwise = []
116
117
118 makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
119 -> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
120 makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
121 if (null periods)
122 then []
123 else removeOldPointers oldPointers fil thr prox lastPrd
124 -- | at least on of the pair candidates should be from the last added period
125 $ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
126 $ listToKeys
127 $ filter (\(id,ngrams) ->
128 let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
129 diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
130 in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
131 ) candidates
132 where
133 lastPrd :: PhyloPeriodId
134 lastPrd = findLastPeriod fil periods
135
136
137 filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
138 filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
139
140
141 reduceDiagos :: Map Date Cooc -> Map Int Double
142 reduceDiagos diagos = mapKeys (\(k,_) -> k)
143 $ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
144
145
146 phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
147 -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
148 phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
149 if (null $ filterPointers proxi thr oldPointers)
150 -- | let's find new pointers
151 then if null nextPointers
152 then []
153 else head' "phyloGroupMatching"
154 -- | Keep only the best set of pointers grouped by proximity
155 $ groupBy (\pt pt' -> snd pt == snd pt')
156 $ reverse $ sortOn snd $ head' "pointers" nextPointers
157 -- | Find the first time frame where at leats one pointer satisfies the proximity threshold
158 else oldPointers
159 where
160 nextPointers :: [[Pointer]]
161 nextPointers = take 1
162 $ dropWhile (null)
163 -- | for each time frame, process the proximity on relevant pairs of targeted groups
164 $ scanl (\acc groups ->
165 let periods = nub $ map (fst . fst . fst) $ concat groups
166 nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
167 diago = reduceDiagos
168 $ filterDiago diagos ([(fst . fst) id] ++ periods)
169 -- | important resize nbdocs et diago dans le make pairs
170 pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
171 in acc ++ ( filterPointers proxi thr
172 $ concat
173 $ map (\(c,c') ->
174 -- | process the proximity between the current group and a pair of candidates
175 let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
176 in if (c == c')
177 then [(fst c,proximity)]
178 else [(fst c,proximity),(fst c',proximity)] ) pairs )) []
179 $ inits candidates -- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
180
181
182 filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
183 filterDocs d pds = restrictKeys d $ periodsToYears pds
184
185 filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
186 filterDiago diago pds = restrictKeys diago $ periodsToYears pds
187
188
189 -----------------------------
190 -- | Matching Processing | --
191 -----------------------------
192
193
194 getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
195 getNextPeriods fil max' pId pIds =
196 case fil of
197 ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
198 ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
199
200
201 getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
202 getCandidates ego targets =
203 map (\groups' ->
204 filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')
205 ) groups') targets
206
207
208 matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
209 matchGroupsToGroups frame periods proximity thr docs coocs groups =
210 let groups' = groupByField _phylo_groupPeriod groups
211 in foldl' (\acc prd ->
212 let -- | 1) find the parents/childs matching periods
213 periodsPar = getNextPeriods ToParents frame prd periods
214 periodsChi = getNextPeriods ToChilds frame prd periods
215 -- | 2) find the parents/childs matching candidates
216 candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
217 candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
218 -- | 3) find the parents/child number of docs by years
219 docsPar = filterDocs docs ([prd] ++ periodsPar)
220 docsChi = filterDocs docs ([prd] ++ periodsChi)
221 -- | 4) find the parents/child diago by years
222 diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
223 diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
224 -- | 5) match in parallel all the groups (egos) to their possible candidates
225 egos = map (\ego ->
226 let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
227 thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
228 pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi
229 thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
230 in addPointers ToChilds TemporalPointer pointersChi
231 $ addPointers ToParents TemporalPointer pointersPar ego)
232 $ findWithDefault [] prd groups'
233 egos' = egos `using` parList rdeepseq
234 in acc ++ egos'
235 ) [] periods
236
237
238 -----------------------
239 -- | Phylo Quality | --
240 -----------------------
241
242
243 relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
244 relevantBranches term branches =
245 filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
246
247 fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
248 fScore beta i bk bks =
249 let recall = ( (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) bk)
250 / (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) $ concat bks))
251 accuracy = ( (fromIntegral $ length $ filter (\g -> elem i $ g ^. phylo_groupNgrams) bk)
252 / (fromIntegral $ length bk))
253 in ((1 + beta ** 2) * accuracy * recall)
254 / (((beta ** 2) * accuracy + recall))
255
256
257 wk :: [PhyloGroup] -> Double
258 wk bk = fromIntegral $ length bk
259
260
261 toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
262 toPhyloQuality' beta freq branches =
263 if (null branches)
264 then 0
265 else sum
266 $ map (\i ->
267 let bks = relevantBranches i branches
268 in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i bk bks)) bks))
269 $ keys freq
270
271
272 ------------------------------------
273 -- | Constant Temporal Matching | --
274 ------------------------------------
275
276
277 groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
278 groupsToBranches groups =
279 -- | run the related component algorithm
280 let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
281 $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
282 $ map (\group -> [getGroupId group]
283 ++ (map fst $ group ^. phylo_groupPeriodParents)
284 ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
285 -- | first find the related components by inside each ego's period
286 -- | a supprimer
287 graph' = map relatedComponents egos
288 -- | then run it for the all the periods
289 graph = zip [1..]
290 $ relatedComponents $ concat (graph' `using` parList rdeepseq)
291 -- | update each group's branch id
292 in map (\(bId,ids) ->
293 let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
294 $ elems $ restrictKeys groups (Set.fromList ids)
295 in groups' `using` parList rdeepseq ) graph
296
297
298 reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
299 reduceFrequency frequency branches =
300 restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
301
302 updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
303 updateThr thr branches = map (\b -> map (\g ->
304 g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
305
306
307 -- | Sequentially break each branch of a phylo where
308 -- done = all the allready broken branches
309 -- ego = the current branch we want to break
310 -- rest = the branches we still have to break
311 breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
312 -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
313 breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
314 -- | 1) keep or not the new division of ego
315 let done' = done ++ (if snd ego
316 then
317 (if ((null (fst ego')) || (quality > quality'))
318 then
319 -- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
320 -- <> " | " <> show(length $ fst ego) <> " groups : "
321 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
322 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
323 [(fst ego,False)]
324 else
325 -- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
326 -- <> " | " <> show(length $ fst ego) <> " groups : "
327 -- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
328 -- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
329 ((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
330 else [ego])
331 in
332 -- | 2) if there is no more branches in rest then return else continue
333 if null rest
334 then done'
335 else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
336 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
337 where
338 --------------------------------------
339 quality :: Double
340 quality = toPhyloQuality' beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
341 --------------------------------------
342 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
343 ego' =
344 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
345 $ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
346 branches' = branches `using` parList rdeepseq
347 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
348 $ thrToMeta thr
349 $ depthToMeta (elevation - depth) branches'
350 --------------------------------------
351 quality' :: Double
352 quality' = toPhyloQuality' beta frequency
353 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
354
355
356 seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
357 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
358 seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
359 -- | if there is no branch to break or if seaLvl level > 1 then end
360 if (thr >= 1) || ((not . or) $ map snd branches)
361 then branches
362 else
363 -- | break all the possible branches at the current seaLvl level
364 let branches' = breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
365 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
366 frequency' = reduceFrequency frequency (map fst branches')
367 in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
368
369
370 constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
371 constanteTemporalMatching start step phylo = updatePhyloGroups 1
372 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
373 phylo
374 where
375 -- | 2) process the temporal matching by elevating seaLvl level
376 branches :: [[PhyloGroup]]
377 branches = map fst
378 $ seaLevelMatching (phyloProximity $ getConfig phylo)
379 (_qua_granularity $ phyloQuality $ getConfig phylo)
380 (_qua_minBranch $ phyloQuality $ getConfig phylo)
381 (phylo ^. phylo_termFreq)
382 start step
383 ((((1 - start) / step) - 1))
384 (((1 - start) / step))
385 (getTimeFrame $ timeUnit $ getConfig phylo)
386 (getPeriodIds phylo)
387 (phylo ^. phylo_timeDocs)
388 (phylo ^. phylo_timeCooc)
389 groups
390 -- | 1) for each group process an initial temporal Matching
391 -- | here we suppose that all the groups of level 1 are part of the same big branch
392 groups :: [([PhyloGroup],Bool)]
393 groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
394 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
395 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
396 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
397 start
398 (phylo ^. phylo_timeDocs)
399 (phylo ^. phylo_timeCooc)
400 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
401
402
403 --------------------------------------
404 -- | Adaptative Temporal Matching | --
405 --------------------------------------
406
407
408 thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
409 thrToMeta thr branches =
410 map (\b ->
411 map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
412
413 depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
414 depthToMeta depth branches =
415 let break = length branches > 1
416 in map (\b ->
417 map (\g ->
418 if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
419 else g) b) branches
420
421 reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
422 reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
423
424
425 getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
426 getInTupleMap m k k'
427 | isJust (m !? ( k ,k')) = m ! ( k ,k')
428 | isJust (m !? ( k',k )) = m ! ( k',k )
429 | otherwise = 0
430
431
432 toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
433 toThreshold lvl proxiGroups =
434 let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
435 in if idx >= 0
436 then (sort $ elems proxiGroups) !! idx
437 else 1
438
439
440 -- done = all the allready broken branches
441 -- ego = the current branch we want to break
442 -- rest = the branches we still have to break
443 adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
444 -> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
445 -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
446 -> [([PhyloGroup],(Bool,[Double]))]
447 adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
448 -- | 1) keep or not the new division of ego
449 let done' = done ++ (if (fst . snd) ego
450 then (if ((null (fst ego')) || (quality > quality'))
451 then
452 [(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
453 else
454 ( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
455 ++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
456 else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
457 in
458 -- | uncomment let .. in for debugging
459 -- let part1 = partition (snd) done'
460 -- part2 = partition (snd) rest
461 -- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
462 -- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
463 -- ) $
464 -- | 2) if there is no more branches in rest then return else continue
465 if null rest
466 then done'
467 else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
468 done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
469 where
470 --------------------------------------
471 thr :: Double
472 thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
473 --------------------------------------
474 quality :: Double
475 quality = toPhyloQuality' beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
476 --------------------------------------
477 ego' :: ([[PhyloGroup]],[[PhyloGroup]])
478 ego' =
479 let branches = groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
480 $ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
481 branches' = branches `using` parList rdeepseq
482 in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
483 $ thrToMeta thr
484 $ depthToMeta (elevation - depth) branches'
485 --------------------------------------
486 quality' :: Double
487 quality' = toPhyloQuality' beta frequency
488 ((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
489
490
491 adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
492 -> Double -> Int -> Map Int Double
493 -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
494 -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
495 adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
496 -- | if there is no branch to break or if seaLvl level >= depth then end
497 if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
498 then branches
499 else
500 -- | break all the possible branches at the current seaLvl level
501 let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
502 [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
503 frequency' = reduceFrequency frequency (map fst branches')
504 groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
505 -- thr = toThreshold depth groupsProxi
506 in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
507 <> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
508 <> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
509 <> " thr = ")
510 $ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches'
511
512
513 adaptativeTemporalMatching :: Double -> Phylo -> Phylo
514 adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
515 (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
516 phylo
517 where
518 -- | 2) process the temporal matching by elevating seaLvl level
519 branches :: [[PhyloGroup]]
520 branches = map fst
521 $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
522 (elevation - 1)
523 elevation
524 (phylo ^. phylo_groupsProxi)
525 (_qua_granularity $ phyloQuality $ getConfig phylo)
526 (_qua_minBranch $ phyloQuality $ getConfig phylo)
527 (phylo ^. phylo_termFreq)
528 (getTimeFrame $ timeUnit $ getConfig phylo)
529 (getPeriodIds phylo)
530 (phylo ^. phylo_timeDocs)
531 (phylo ^. phylo_timeCooc)
532 groups
533 -- | 1) for each group process an initial temporal Matching
534 -- | here we suppose that all the groups of level 1 are part of the same big branch
535 groups :: [([PhyloGroup],(Bool,[Double]))]
536 groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
537 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
538 $ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
539 (getPeriodIds phylo) (phyloProximity $ getConfig phylo)
540 thr
541 (phylo ^. phylo_timeDocs)
542 (phylo ^. phylo_timeCooc)
543 (traceTemporalMatching $ getGroupsFromLevel 1 phylo)
544 --------------------------------------
545 thr :: Double
546 thr = toThreshold elevation (phylo ^. phylo_groupsProxi)