2 Module : Gargantext.Core.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE ViewPatterns #-}
13 module Gargantext.Core.Viz.Phylo.PhyloTools where
15 import Control.Lens hiding (Level)
16 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, iterate, transpose, partition, tails, nubBy, group, notElem, (!!))
17 import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
18 import Data.Set (Set, disjoint)
19 import Data.String (String)
20 import Data.Text (Text,unpack)
21 import Data.Vector (Vector, elemIndex)
22 import Debug.Trace (trace)
23 import Gargantext.Core.Viz.Phylo
24 import Gargantext.Prelude
25 import Prelude (floor,read)
27 import qualified Data.List as List
28 import qualified Data.Set as Set
29 import qualified Data.Text as Text
30 import qualified Data.Vector as Vector
31 import qualified Data.Map as Map
37 -- | To print an important message as an IO()
38 printIOMsg :: String -> IO ()
43 <> "-- | " <> msg <> "\n" )
46 -- | To print a comment as an IO()
47 printIOComment :: String -> IO ()
49 putStrLn ( "\n" <> cmt <> "\n" )
56 -- truncate' :: Double -> Int -> Double
57 -- truncate' x n = (fromIntegral (floor (x * t))) / t
60 truncate' :: Double -> Int -> Double
61 truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
67 getInMap :: Int -> Map Int Double -> Double
73 roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
74 roundToStr = printf "%0.*f"
77 countSup :: Double -> [Double] -> Int
78 countSup s l = length $ filter (>s) l
81 dropByIdx :: Int -> [a] -> [a]
82 dropByIdx k l = take k l ++ drop (k+1) l
85 elemIndex' :: Eq a => a -> [a] -> Int
86 elemIndex' e l = case (List.elemIndex e l) of
87 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
91 commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
92 commonPrefix lst lst' acc =
93 if (null lst || null lst')
95 else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
96 then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
100 ---------------------
101 -- | Foundations | --
102 ---------------------
105 -- | Is this Ngrams a Foundations Root ?
106 isRoots :: Ngrams -> Vector Ngrams -> Bool
107 isRoots n ns = Vector.elem n ns
109 -- | To transform a list of nrams into a list of foundation's index
110 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
111 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
113 -- | To transform a list of sources into a list of sources' index
114 sourcesToIdx :: [Text] -> Vector Text -> [Int]
115 sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
117 -- | To transform a list of Ngrams Indexes into a Label
118 ngramsToLabel :: Vector Ngrams -> [Int] -> Text
119 ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
121 idxToLabel :: [Int] -> String
122 idxToLabel l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
124 idxToLabel' :: [Double] -> String
125 idxToLabel' l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
127 -- | To transform a list of Ngrams Indexes into a list of Text
128 ngramsToText :: Vector Ngrams -> [Int] -> [Text]
129 ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
136 -- | To transform a list of periods into a set of Dates
137 periodsToYears :: [(Date,Date)] -> Set Date
138 periodsToYears periods = (Set.fromList . sort . concat)
139 $ map (\(d,d') -> [d..d']) periods
142 findBounds :: [Date] -> (Date,Date)
144 let dates' = sort dates
145 in (head' "findBounds" dates', last' "findBounds" dates')
148 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
149 toPeriods dates p s =
150 let (start,end) = findBounds dates
151 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
152 $ chunkAlong p s [start .. end]
155 toFstDate :: [Text] -> Text
160 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
163 toLstDate :: [Text] -> Text
169 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
173 getTimeScale :: Phylo -> [Char]
174 getTimeScale p = case (timeUnit $ getConfig p) of
182 -- | Get a regular & ascendante timeScale from a given list of dates
183 toTimeScale :: [Date] -> Int -> [Date]
184 toTimeScale dates step =
185 let (start,end) = findBounds dates
186 in [start, (start + step) .. end]
189 getTimeStep :: TimeUnit -> Int
190 getTimeStep time = case time of
191 Epoch { .. } -> _epoch_step
192 Year { .. } -> _year_step
193 Month { .. } -> _month_step
194 Week { .. } -> _week_step
195 Day { .. } -> _day_step
197 getTimePeriod :: TimeUnit -> Int
198 getTimePeriod time = case time of
199 Epoch { .. } -> _epoch_period
200 Year { .. } -> _year_period
201 Month { .. } -> _month_period
202 Week { .. } -> _week_period
203 Day { .. } -> _day_period
205 getTimeFrame :: TimeUnit -> Int
206 getTimeFrame time = case time of
207 Epoch { .. } -> _epoch_matchingFrame
208 Year { .. } -> _year_matchingFrame
209 Month { .. } -> _month_matchingFrame
210 Week { .. } -> _week_matchingFrame
211 Day { .. } -> _day_matchingFrame
218 -- | To find if l' is nested in l
219 isNested :: Eq a => [a] -> [a] -> Bool
222 | length l' > length l = False
223 | union l l' == l = True
227 -- | To filter Fis with small Support but by keeping non empty Periods
228 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
229 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
230 then keepFilled f (thr - 1) l
234 traceClique :: Map (Date, Date) [Clustering] -> String
235 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
237 --------------------------------------
239 cliques = sort $ map (fromIntegral . length . _clustering_roots) $ concat $ elems mFis
240 --------------------------------------
243 traceSupport :: Map (Date, Date) [Clustering] -> String
244 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
246 --------------------------------------
248 supports = sort $ map (fromIntegral . _clustering_support) $ concat $ elems mFis
249 --------------------------------------
252 traceFis :: [Char] -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
253 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
254 <> "Support : " <> traceSupport mFis <> "\n"
255 <> "Nb Ngrams : " <> traceClique mFis <> "\n" ) mFis
263 getCliqueSupport :: Cluster -> Int
264 getCliqueSupport unit = case unit of
268 getCliqueSize :: Cluster -> Int
269 getCliqueSize unit = case unit of
278 listToCombi' :: [a] -> [(a,a)]
279 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
281 listToEqual' :: Eq a => [a] -> [(a,a)]
282 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
284 listToKeys :: Eq a => [a] -> [(a,a)]
285 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
287 listToMatrix :: [Int] -> Map (Int,Int) Double
288 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
290 listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
291 listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
293 listToSeq :: Eq a => [a] -> [(a,a)]
294 listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
296 sumCooc :: Cooc -> Cooc -> Cooc
297 sumCooc cooc cooc' = unionWith (+) cooc cooc'
299 getTrace :: Cooc -> Double
300 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
302 coocToDiago :: Cooc -> Cooc
303 coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
305 coocToAdjacency :: Cooc -> Cooc
306 coocToAdjacency cooc = Map.map (\_ -> 1) cooc
308 -- | To build the local cooc matrix of each phylogroup
309 ngramsToCooc :: [Int] -> [Cooc] -> Cooc
310 ngramsToCooc ngrams coocs =
311 let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
312 pairs = listToKeys ngrams
313 in filterWithKey (\k _ -> elem k pairs) cooc
320 -- | find the local maxima in a list of values
321 findMaxima :: [(Double,Double)] -> [Bool]
322 findMaxima lst = map (hasMax) $ toChunk 3 lst
325 hasMax :: [(Double,Double)] -> Bool
327 if (length chunk) /= 3
329 else (snd(chunk !! 0) < snd(chunk !! 1)) && (snd(chunk !! 2) < snd(chunk !! 1))
332 -- | split a list into chunks of size n
333 toChunk :: Int -> [a] -> [[a]]
334 toChunk n = takeWhile ((== n) . length) . transpose . take n . iterate tail
337 -- | To compute the average degree from a cooc matrix
338 -- http://networksciencebook.com/chapter/2#degree
339 toAverageDegree :: Cooc -> Vector Ngrams -> Double
340 toAverageDegree cooc roots = 2 * (fromIntegral $ Map.size cooc) / (fromIntegral $ Vector.length roots)
343 -- | Use the giant component regime to estimate the default level
344 -- http://networksciencebook.com/chapter/3#networks-supercritical
345 regimeToDefaultLevel :: Cooc -> Vector Ngrams -> Double
346 regimeToDefaultLevel cooc roots
348 | avg < 1 = avg * 0.6
350 | avg < lnN = (avg * 0.2) / lnN
354 avg = toAverageDegree cooc roots
356 lnN = log (fromIntegral $ Vector.length roots)
358 coocToConfidence :: Phylo -> Cooc
359 coocToConfidence phylo =
360 let count = getRootsCount phylo
361 cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty
362 $ elems $ getCoocByDate phylo
363 in Map.mapWithKey (\(a,b) w -> confidence a b w count) cooc
367 confidence :: Int -> Int -> Double -> Map Int Double -> Double
368 confidence a b inter card = maximum [(inter / card ! a),(inter / card ! b)]
371 sumtest :: [Int] -> [Int] -> Int
372 sumtest l1 l2 = (head' "test" l1) + (head' "test" $ reverse l2)
375 findDefaultLevel :: Phylo -> Phylo
376 findDefaultLevel phylo =
377 let confidence = Map.filterWithKey (\(a,b) _ -> a /= b)
378 $ Map.filter (> 0.01)
379 $ coocToConfidence phylo
380 roots = getRoots phylo
381 level = regimeToDefaultLevel confidence roots
382 in updateLevel level phylo
389 getGroupId :: PhyloGroup -> PhyloGroupId
390 getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupScale), g ^. phylo_groupIndex)
392 getGroupNgrams :: PhyloGroup -> [Int]
393 getGroupNgrams g = g ^. phylo_groupNgrams
395 idToPrd :: PhyloGroupId -> Period
396 idToPrd id = (fst . fst) id
398 groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
399 groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
401 getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
402 getPeriodPointers fil g =
404 ToChilds -> g ^. phylo_groupPeriodChilds
405 ToParents -> g ^. phylo_groupPeriodParents
406 ToChildsMemory -> undefined
407 ToParentsMemory -> undefined
409 filterSimilarity :: PhyloSimilarity -> Double -> Double -> Bool
410 filterSimilarity similarity thr local =
412 WeightedLogJaccard _ _ -> local >= thr
413 WeightedLogSim _ _ -> local >= thr
414 Hamming _ _ -> undefined
416 getSimilarityName :: PhyloSimilarity -> String
417 getSimilarityName similarity =
419 WeightedLogJaccard _ _ -> "WLJaccard"
420 WeightedLogSim _ _ -> "WeightedLogSim"
421 Hamming _ _ -> "Hamming"
427 addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
428 addPointers fil pty pointers g =
430 TemporalPointer -> case fil of
431 ToChilds -> g & phylo_groupPeriodChilds .~ pointers
432 ToParents -> g & phylo_groupPeriodParents .~ pointers
433 ToChildsMemory -> undefined
434 ToParentsMemory -> undefined
435 ScalePointer -> case fil of
436 ToChilds -> g & phylo_groupScaleChilds .~ pointers
437 ToParents -> g & phylo_groupScaleParents .~ pointers
438 ToChildsMemory -> undefined
439 ToParentsMemory -> undefined
441 toPointer' :: Double -> Pointer -> Pointer'
442 toPointer' thr pt = (fst pt,(thr,snd pt))
445 addMemoryPointers :: Filiation -> PointerType -> Double -> [Pointer] -> PhyloGroup -> PhyloGroup
446 addMemoryPointers fil pty thr pointers g =
448 TemporalPointer -> case fil of
449 ToChilds -> undefined
450 ToParents -> undefined
451 ToChildsMemory -> g & phylo_groupPeriodMemoryChilds .~ (concat [(g ^. phylo_groupPeriodMemoryChilds),(map (\pt -> toPointer' thr pt) pointers)])
452 ToParentsMemory -> g & phylo_groupPeriodMemoryParents .~ (concat [(g ^. phylo_groupPeriodMemoryParents),(map (\pt -> toPointer' thr pt) pointers)])
453 ScalePointer -> undefined
456 getPeriodIds :: Phylo -> [(Date,Date)]
457 getPeriodIds phylo = sortOn fst
459 $ phylo ^. phylo_periods
461 getLevelParentId :: PhyloGroup -> PhyloGroupId
462 getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
464 getLastLevel :: Phylo -> Scale
465 getLastLevel phylo = last' "lastLevel" $ getScales phylo
467 getScales :: Phylo -> [Scale]
468 getScales phylo = nub
470 $ keys $ view ( phylo_periods
472 . phylo_periodScales ) phylo
474 getSeaElevation :: Phylo -> SeaElevation
475 getSeaElevation phylo = seaElevation (getConfig phylo)
477 getSimilarity :: Phylo -> PhyloSimilarity
478 getSimilarity phylo = similarity (getConfig phylo)
481 getPhyloSeaRiseStart :: Phylo -> Double
482 getPhyloSeaRiseStart phylo = case (getSeaElevation phylo) of
487 getPhyloSeaRiseSteps :: Phylo -> Double
488 getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of
494 getConfig :: Phylo -> PhyloConfig
495 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
497 getLevel :: Phylo -> Double
498 getLevel phylo = _phylo_level phylo
500 getLadder :: Phylo -> [Double]
501 getLadder phylo = phylo ^. phylo_seaLadder
503 getCoocByDate :: Phylo -> Map Date Cooc
504 getCoocByDate phylo = coocByDate (phylo ^. phylo_counts)
506 getDocsByDate :: Phylo -> Map Date Double
507 getDocsByDate phylo = docsByDate (phylo ^. phylo_counts)
509 getRootsCount :: Phylo -> Map Int Double
510 getRootsCount phylo = rootsCount (phylo ^. phylo_counts)
512 getRootsFreq :: Phylo -> Map Int Double
513 getRootsFreq phylo = rootsFreq (phylo ^. phylo_counts)
515 getLastRootsFreq :: Phylo -> Map Int Double
516 getLastRootsFreq phylo = lastRootsFreq (phylo ^. phylo_counts)
518 setConfig :: PhyloConfig -> Phylo -> Phylo
519 setConfig config phylo = phylo
520 & phylo_param .~ (PhyloParam
521 ((phylo ^. phylo_param) ^. phyloParam_version)
522 ((phylo ^. phylo_param) ^. phyloParam_software)
525 -- & phylo_param & phyloParam_config & phyloParam_config .~ config
528 getRoots :: Phylo -> Vector Ngrams
529 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
531 getRootsInGroups :: Phylo -> Map Int [PhyloGroupId]
532 getRootsInGroups phylo = (phylo ^. phylo_foundations) ^. foundations_rootsInGroups
534 getSources :: Phylo -> Vector Text
535 getSources phylo = _sources (phylo ^. phylo_sources)
538 -- get the groups distributed by branches at the last scale
539 phyloLastScale :: Phylo -> [[PhyloGroup]]
540 phyloLastScale phylo = elems
542 $ map (\g -> (g ^. phylo_groupBranchId, [g]))
543 $ getGroupsFromScale (last' "byBranches" $ getScales phylo) phylo
545 getGroupsFromScale :: Scale -> Phylo -> [PhyloGroup]
546 getGroupsFromScale lvl phylo =
547 elems $ view ( phylo_periods
551 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
552 . phylo_scaleGroups ) phylo
555 getGroupsFromScalePeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
556 getGroupsFromScalePeriods lvl periods phylo =
557 elems $ view ( phylo_periods
559 . filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
562 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
563 . phylo_scaleGroups ) phylo
566 getGroupsFromPeriods :: Scale -> Map Period PhyloPeriod -> [PhyloGroup]
567 getGroupsFromPeriods lvl periods =
568 elems $ view ( traverse
571 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
572 . phylo_scaleGroups ) periods
575 updatePhyloGroups :: Scale -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
576 updatePhyloGroups lvl m phylo =
581 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
585 let id = getGroupId g
591 updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
592 updatePeriods periods' phylo =
593 over (phylo_periods . traverse)
595 let prd' = periods' ! (prd ^. phylo_periodPeriod)
596 lvls = map (\lvl -> lvl & phylo_scalePeriodStr .~ prd') $ prd ^. phylo_periodScales
597 in prd & phylo_periodPeriodStr .~ prd'
598 & phylo_periodScales .~ lvls
601 updateQuality :: Double -> Phylo -> Phylo
602 updateQuality quality phylo = phylo { _phylo_quality = quality }
604 updateLevel :: Double -> Phylo -> Phylo
605 updateLevel level phylo = phylo { _phylo_level = level }
607 traceToPhylo :: Scale -> Phylo -> Phylo
608 traceToPhylo lvl phylo =
609 trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
610 <> show (length $ getGroupsFromScale lvl phylo) <> " groups and "
611 <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo
617 mergeBranchIds :: [[Int]] -> [Int]
618 mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
620 -- | 2) find the most Up Left ids in the hierarchy of similarity
621 -- mostUpLeft :: [[Int]] -> [[Int]]
623 -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
624 -- inf = (fst . minimum) groupIds
625 -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
626 -- | 1) find the most frequent ids
627 mostFreq' :: [[Int]] -> [[Int]]
629 let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
630 sup = (fst . maximum) groupIds
631 in map snd $ filter (\gIds -> fst gIds == sup) groupIds
634 mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
635 mergeMeta bId groups =
636 let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
637 in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
640 groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
641 groupsToBranches' groups =
642 {- run the related component algorithm -}
643 let egos = map (\g -> [getGroupId g]
644 ++ (map fst $ g ^. phylo_groupPeriodParents)
645 ++ (map fst $ g ^. phylo_groupPeriodChilds)
646 ++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups
647 graph = relatedComponents egos
648 {- update each group's branch id -}
650 let groups' = elems $ restrictKeys groups (Set.fromList ids)
651 bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
652 in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
655 relatedComponents :: Ord a => [[a]] -> [[a]]
656 relatedComponents graph = foldl' (\branches groups ->
658 then branches ++ [groups]
660 let branchPart = partition (\branch -> disjoint (Set.fromList branch) (Set.fromList groups)) branches
661 in (fst branchPart) ++ [nub $ concat $ (snd branchPart) ++ [groups]]) [] graph
664 toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
665 toRelatedComponents nodes edges =
666 let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
667 clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
668 in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
671 traceSynchronyEnd :: Phylo -> Phylo
672 traceSynchronyEnd phylo =
673 trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
674 <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
675 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
678 traceSynchronyStart :: Phylo -> Phylo
679 traceSynchronyStart phylo =
680 trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
681 <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
682 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
690 getSensibility :: PhyloSimilarity -> Double
691 getSensibility proxi = case proxi of
692 WeightedLogJaccard s _ -> s
693 WeightedLogSim s _ -> s
694 Hamming _ _ -> undefined
696 getMinSharedNgrams :: PhyloSimilarity -> Int
697 getMinSharedNgrams proxi = case proxi of
698 WeightedLogJaccard _ m -> m
699 WeightedLogSim _ m -> m
700 Hamming _ _ -> undefined
706 intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
707 intersectInit acc lst lst' =
708 if (null lst) || (null lst')
710 else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
711 then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
714 branchIdsToSimilarity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
715 branchIdsToSimilarity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
717 ngramsInBranches :: [[PhyloGroup]] -> [Int]
718 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
721 traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
722 traceMatchSuccess thr qua qua' nextBranches =
723 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
724 $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
725 <> ",(1.." <> show (length nextBranches) <> ")]"
726 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
727 <> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
728 <> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
731 traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
732 traceMatchFailure thr qua qua' branches =
733 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
734 <> ",(1.." <> show (length branches) <> ")]"
735 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
736 <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
740 traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
741 traceMatchNoSplit branches =
742 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
743 <> ",(1.." <> show (length branches) <> ")]"
744 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
745 <> " - unable to split in smaller branches" <> "\n"
749 traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
750 traceMatchLimit branches =
751 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
752 <> ",(1.." <> show (length branches) <> ")]"
753 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
754 <> " - unable to increase the threshold above 1" <> "\n"
758 traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
759 traceMatchEnd groups =
760 trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
761 <> " branches and " <> show (length groups) <> " groups" <> "\n") groups
764 traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
765 traceTemporalMatching groups =
766 trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
769 traceGroupsProxi :: [Double] -> [Double]
771 trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups Similarity" <> "\n") l