2 Module : Gargantext.Core.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE ViewPatterns #-}
13 module Gargantext.Core.Viz.Phylo.PhyloTools where
15 import Control.Lens hiding (Level)
16 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, iterate, transpose, partition, tails, nubBy, group, notElem, (!!))
17 import Data.Map (Map, elems, fromList, findWithDefault, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
18 import Data.Set (Set, disjoint)
19 import Data.String (String)
20 import Data.Text (Text,unpack)
21 import Data.Vector (Vector, elemIndex)
22 import Debug.Trace (trace)
23 import Gargantext.Core.Viz.Phylo
24 import Gargantext.Prelude
25 import Prelude (floor,read)
27 import qualified Data.List as List
28 import qualified Data.Set as Set
29 import qualified Data.Text as Text
30 import qualified Data.Vector as Vector
31 import qualified Data.Map as Map
37 -- | To print an important message as an IO()
38 printIOMsg :: String -> IO ()
43 <> "-- | " <> msg <> "\n" )
46 -- | To print a comment as an IO()
47 printIOComment :: String -> IO ()
49 putStrLn ( "\n" <> cmt <> "\n" )
56 -- truncate' :: Double -> Int -> Double
57 -- truncate' x n = (fromIntegral (floor (x * t))) / t
60 truncate' :: Double -> Int -> Double
61 truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
67 getInMap :: Int -> Map Int Double -> Double
73 roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
74 roundToStr = printf "%0.*f"
77 countSup :: Double -> [Double] -> Int
78 countSup s l = length $ filter (>s) l
81 dropByIdx :: Int -> [a] -> [a]
82 dropByIdx k l = take k l ++ drop (k+1) l
85 elemIndex' :: Eq a => a -> [a] -> Int
86 elemIndex' e l = case (List.elemIndex e l) of
87 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
91 commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
92 commonPrefix lst lst' acc =
93 if (null lst || null lst')
95 else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
96 then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
100 ---------------------
101 -- | Foundations | --
102 ---------------------
105 -- | Is this Ngrams a Foundations Root ?
106 isRoots :: Ngrams -> Vector Ngrams -> Bool
107 isRoots n ns = Vector.elem n ns
109 -- | To transform a list of nrams into a list of foundation's index
110 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
111 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
113 -- | To transform a list of sources into a list of sources' index
114 sourcesToIdx :: [Text] -> Vector Text -> [Int]
115 sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
117 -- | To transform a list of Ngrams Indexes into a Label
118 ngramsToLabel :: Vector Ngrams -> [Int] -> Text
119 ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
121 idxToLabel :: [Int] -> String
122 idxToLabel l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
124 idxToLabel' :: [Double] -> String
125 idxToLabel' l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
127 -- | To transform a list of Ngrams Indexes into a list of Text
128 ngramsToText :: Vector Ngrams -> [Int] -> [Text]
129 ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
136 -- | To transform a list of periods into a set of Dates
137 periodsToYears :: [(Date,Date)] -> Set Date
138 periodsToYears periods = (Set.fromList . sort . concat)
139 $ map (\(d,d') -> [d..d']) periods
142 findBounds :: [Date] -> (Date,Date)
144 let dates' = sort dates
145 in (head' "findBounds" dates', last' "findBounds" dates')
148 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
149 toPeriods dates p s =
150 let (start,end) = findBounds dates
151 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
152 $ chunkAlong p s [start .. end]
155 toFstDate :: [Text] -> Text
160 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
163 toLstDate :: [Text] -> Text
169 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
173 getTimeScale :: Phylo -> [Char]
174 getTimeScale p = case (timeUnit $ getConfig p) of
175 Epoch _ _ _ -> "epoch"
177 Month _ _ _ -> "month"
182 -- | Get a regular & ascendante timeScale from a given list of dates
183 toTimeScale :: [Date] -> Int -> [Date]
184 toTimeScale dates step =
185 let (start,end) = findBounds dates
186 in [start, (start + step) .. end]
189 getTimeStep :: TimeUnit -> Int
190 getTimeStep time = case time of
197 getTimePeriod :: TimeUnit -> Int
198 getTimePeriod time = case time of
205 getTimeFrame :: TimeUnit -> Int
206 getTimeFrame time = case time of
218 -- | To find if l' is nested in l
219 isNested :: Eq a => [a] -> [a] -> Bool
222 | length l' > length l = False
223 | (union l l') == l = True
227 -- | To filter Fis with small Support but by keeping non empty Periods
228 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
229 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
230 then keepFilled f (thr - 1) l
234 traceClique :: Map (Date, Date) [Clustering] -> String
235 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
237 --------------------------------------
239 cliques = sort $ map (fromIntegral . length . _clustering_roots) $ concat $ elems mFis
240 --------------------------------------
243 traceSupport :: Map (Date, Date) [Clustering] -> String
244 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
246 --------------------------------------
248 supports = sort $ map (fromIntegral . _clustering_support) $ concat $ elems mFis
249 --------------------------------------
252 traceFis :: [Char] -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
253 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
254 <> "Support : " <> (traceSupport mFis) <> "\n"
255 <> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
263 getCliqueSupport :: Cluster -> Int
264 getCliqueSupport unit = case unit of
268 getCliqueSize :: Cluster -> Int
269 getCliqueSize unit = case unit of
278 listToCombi' :: [a] -> [(a,a)]
279 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
281 listToEqual' :: Eq a => [a] -> [(a,a)]
282 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
284 listToKeys :: Eq a => [a] -> [(a,a)]
285 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
287 listToMatrix :: [Int] -> Map (Int,Int) Double
288 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
290 listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
291 listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
293 listToSeq :: Eq a => [a] -> [(a,a)]
294 listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
296 sumCooc :: Cooc -> Cooc -> Cooc
297 sumCooc cooc cooc' = unionWith (+) cooc cooc'
299 getTrace :: Cooc -> Double
300 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
302 coocToDiago :: Cooc -> Cooc
303 coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
305 coocToAdjacency :: Cooc -> Cooc
306 coocToAdjacency cooc = Map.map (\_ -> 1) cooc
308 -- | To build the local cooc matrix of each phylogroup
309 ngramsToCooc :: [Int] -> [Cooc] -> Cooc
310 ngramsToCooc ngrams coocs =
311 let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
312 pairs = listToKeys ngrams
313 in filterWithKey (\k _ -> elem k pairs) cooc
321 -- | To build the density of a phylogroup
322 -- density is defined in Callon M, Courtial JP, Laville F (1991) Co-word analysis as a tool for describing
323 -- the network of interaction between basic and technological research: The case of polymer chemistry.
324 -- Scientometric 22: 155–205.
325 ngramsToDensity :: [Int] -> [Cooc] -> (Map Int Double) -> Double
326 ngramsToDensity ngrams coocs rootsCount =
327 let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
328 pairs = listToCombi' ngrams
329 density = map (\(i,j) ->
330 let nij = findWithDefault 0 (i,j) cooc
331 in (nij * nij) / ((rootsCount ! i) * (rootsCount ! j))) pairs
332 in (sum density) / (fromIntegral $ length ngrams)
341 -- | find the local maxima in a list of values
342 findMaxima :: [(Double,Double)] -> [Bool]
343 findMaxima lst = map (hasMax) $ toChunk 3 lst
346 hasMax :: [(Double,Double)] -> Bool
348 if (length chunk) /= 3
350 else (snd(chunk !! 0) < snd(chunk !! 1)) && (snd(chunk !! 2) < snd(chunk !! 1))
353 -- | split a list into chunks of size n
354 toChunk :: Int -> [a] -> [[a]]
355 toChunk n = takeWhile ((== n) . length) . transpose . take n . iterate tail
358 -- | To compute the average degree from a cooc matrix
359 -- http://networksciencebook.com/chapter/2#degree
360 toAverageDegree :: Cooc -> Vector Ngrams -> Double
361 toAverageDegree cooc roots = 2 * (fromIntegral $ Map.size cooc) / (fromIntegral $ Vector.length roots)
364 -- | Use the giant component regime to estimate the default level
365 -- http://networksciencebook.com/chapter/3#networks-supercritical
366 regimeToDefaultLevel :: Cooc -> Vector Ngrams -> Double
367 regimeToDefaultLevel cooc roots
369 | avg < 1 = avg * 0.6
371 | avg < lnN = (avg * 0.2) / lnN
375 avg = toAverageDegree cooc roots
377 lnN = log (fromIntegral $ Vector.length roots)
379 coocToConfidence :: Phylo -> Cooc
380 coocToConfidence phylo =
381 let count = getRootsCount phylo
382 cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty
383 $ elems $ getCoocByDate phylo
384 in Map.mapWithKey (\(a,b) w -> confidence a b w count) cooc
388 confidence :: Int -> Int -> Double -> Map Int Double -> Double
389 confidence a b inter card = maximum [(inter / card ! a),(inter / card ! b)]
392 sumtest :: [Int] -> [Int] -> Int
393 sumtest l1 l2 = (head' "test" l1) + (head' "test" $ reverse l2)
396 findDefaultLevel :: Phylo -> Phylo
397 findDefaultLevel phylo =
398 let confidence = Map.filterWithKey (\(a,b) _ -> a /= b)
399 $ Map.filter (> 0.01)
400 $ coocToConfidence phylo
401 roots = getRoots phylo
402 level = regimeToDefaultLevel confidence roots
403 in updateLevel level phylo
410 getGroupId :: PhyloGroup -> PhyloGroupId
411 getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupScale), g ^. phylo_groupIndex)
413 getGroupNgrams :: PhyloGroup -> [Int]
414 getGroupNgrams g = g ^. phylo_groupNgrams
416 idToPrd :: PhyloGroupId -> Period
417 idToPrd id = (fst . fst) id
419 groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
420 groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
422 getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
423 getPeriodPointers fil g =
425 ToChilds -> g ^. phylo_groupPeriodChilds
426 ToParents -> g ^. phylo_groupPeriodParents
427 ToChildsMemory -> undefined
428 ToParentsMemory -> undefined
430 filterSimilarity :: PhyloSimilarity -> Double -> Double -> Bool
431 filterSimilarity similarity thr local =
433 WeightedLogJaccard _ _ -> local >= thr
434 WeightedLogSim _ _ -> local >= thr
435 Hamming _ _ -> undefined
437 getSimilarityName :: PhyloSimilarity -> String
438 getSimilarityName similarity =
440 WeightedLogJaccard _ _ -> "WLJaccard"
441 WeightedLogSim _ _ -> "WeightedLogSim"
442 Hamming _ _ -> "Hamming"
448 addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
449 addPointers fil pty pointers g =
451 TemporalPointer -> case fil of
452 ToChilds -> g & phylo_groupPeriodChilds .~ pointers
453 ToParents -> g & phylo_groupPeriodParents .~ pointers
454 ToChildsMemory -> undefined
455 ToParentsMemory -> undefined
456 ScalePointer -> case fil of
457 ToChilds -> g & phylo_groupScaleChilds .~ pointers
458 ToParents -> g & phylo_groupScaleParents .~ pointers
459 ToChildsMemory -> undefined
460 ToParentsMemory -> undefined
462 toPointer' :: Double -> Pointer -> Pointer'
463 toPointer' thr pt = (fst pt,(thr,snd pt))
466 addMemoryPointers :: Filiation -> PointerType -> Double -> [Pointer] -> PhyloGroup -> PhyloGroup
467 addMemoryPointers fil pty thr pointers g =
469 TemporalPointer -> case fil of
470 ToChilds -> undefined
471 ToParents -> undefined
472 ToChildsMemory -> g & phylo_groupPeriodMemoryChilds .~ (concat [(g ^. phylo_groupPeriodMemoryChilds),(map (\pt -> toPointer' thr pt) pointers)])
473 ToParentsMemory -> g & phylo_groupPeriodMemoryParents .~ (concat [(g ^. phylo_groupPeriodMemoryParents),(map (\pt -> toPointer' thr pt) pointers)])
474 ScalePointer -> undefined
477 getPeriodIds :: Phylo -> [(Date,Date)]
478 getPeriodIds phylo = sortOn fst
480 $ phylo ^. phylo_periods
482 getLastDate :: Phylo -> Date
483 getLastDate phylo = snd $ last' "lastDate" $ getPeriodIds phylo
485 getLevelParentId :: PhyloGroup -> PhyloGroupId
486 getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
488 getLastLevel :: Phylo -> Scale
489 getLastLevel phylo = last' "lastLevel" $ getScales phylo
491 getScales :: Phylo -> [Scale]
492 getScales phylo = nub
494 $ keys $ view ( phylo_periods
496 . phylo_periodScales ) phylo
498 getSeaElevation :: Phylo -> SeaElevation
499 getSeaElevation phylo = seaElevation (getConfig phylo)
501 getSimilarity :: Phylo -> PhyloSimilarity
502 getSimilarity phylo = similarity (getConfig phylo)
505 getPhyloSeaRiseStart :: Phylo -> Double
506 getPhyloSeaRiseStart phylo = case (getSeaElevation phylo) of
511 getPhyloSeaRiseSteps :: Phylo -> Double
512 getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of
518 getConfig :: Phylo -> PhyloConfig
519 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
521 getLevel :: Phylo -> Double
522 getLevel phylo = (phyloQuality (getConfig phylo)) ^. qua_granularity
524 getLadder :: Phylo -> [Double]
525 getLadder phylo = phylo ^. phylo_seaLadder
527 getCoocByDate :: Phylo -> Map Date Cooc
528 getCoocByDate phylo = coocByDate (phylo ^. phylo_counts)
530 getRootsCountByDate :: Phylo -> Map Date (Map Int Double)
531 getRootsCountByDate phylo = rootsCountByDate (phylo ^. phylo_counts)
533 getDocsByDate :: Phylo -> Map Date Double
534 getDocsByDate phylo = docsByDate (phylo ^. phylo_counts)
536 getRootsCount :: Phylo -> Map Int Double
537 getRootsCount phylo = rootsCount (phylo ^. phylo_counts)
539 getRootsFreq :: Phylo -> Map Int Double
540 getRootsFreq phylo = rootsFreq (phylo ^. phylo_counts)
542 getLastRootsFreq :: Phylo -> Map Int Double
543 getLastRootsFreq phylo = lastRootsFreq (phylo ^. phylo_counts)
545 setConfig :: PhyloConfig -> Phylo -> Phylo
546 setConfig config phylo = phylo
547 & phylo_param .~ (PhyloParam
548 ((phylo ^. phylo_param) ^. phyloParam_version)
549 ((phylo ^. phylo_param) ^. phyloParam_software)
552 -- & phylo_param & phyloParam_config & phyloParam_config .~ config
555 getRoots :: Phylo -> Vector Ngrams
556 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
558 getRootsInGroups :: Phylo -> Map Int [PhyloGroupId]
559 getRootsInGroups phylo = (phylo ^. phylo_foundations) ^. foundations_rootsInGroups
561 getSources :: Phylo -> Vector Text
562 getSources phylo = _sources (phylo ^. phylo_sources)
565 -- get the groups distributed by branches at the last scale
566 phyloLastScale :: Phylo -> [[PhyloGroup]]
567 phyloLastScale phylo = elems
569 $ map (\g -> (g ^. phylo_groupBranchId, [g]))
570 $ getGroupsFromScale (last' "byBranches" $ getScales phylo) phylo
572 getGroupsFromScale :: Scale -> Phylo -> [PhyloGroup]
573 getGroupsFromScale lvl phylo =
574 elems $ view ( phylo_periods
578 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
579 . phylo_scaleGroups ) phylo
582 getGroupsFromScalePeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
583 getGroupsFromScalePeriods lvl periods phylo =
584 elems $ view ( phylo_periods
586 . filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
589 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
590 . phylo_scaleGroups ) phylo
593 getGroupsFromPeriods :: Scale -> Map Period PhyloPeriod -> [PhyloGroup]
594 getGroupsFromPeriods lvl periods =
595 elems $ view ( traverse
598 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
599 . phylo_scaleGroups ) periods
602 updatePhyloGroups :: Scale -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
603 updatePhyloGroups lvl m phylo =
608 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
612 let id = getGroupId g
618 updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
619 updatePeriods periods' phylo =
620 over (phylo_periods . traverse)
622 let prd' = periods' ! (prd ^. phylo_periodPeriod)
623 lvls = map (\lvl -> lvl & phylo_scalePeriodStr .~ prd') $ prd ^. phylo_periodScales
624 in prd & phylo_periodPeriodStr .~ prd'
625 & phylo_periodScales .~ lvls
628 updateQuality :: Double -> Phylo -> Phylo
629 updateQuality quality phylo = phylo { _phylo_quality = quality }
631 updateLevel :: Double -> Phylo -> Phylo
632 updateLevel level phylo = phylo { _phylo_level = level }
634 traceToPhylo :: Scale -> Phylo -> Phylo
635 traceToPhylo lvl phylo =
636 trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
637 <> show (length $ getGroupsFromScale lvl phylo) <> " groups and "
638 <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo
644 mergeBranchIds :: [[Int]] -> [Int]
645 mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
647 -- | 2) find the most Up Left ids in the hierarchy of similarity
648 -- mostUpLeft :: [[Int]] -> [[Int]]
650 -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
651 -- inf = (fst . minimum) groupIds
652 -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
653 -- | 1) find the most frequent ids
654 mostFreq' :: [[Int]] -> [[Int]]
656 let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
657 sup = (fst . maximum) groupIds
658 in map snd $ filter (\gIds -> fst gIds == sup) groupIds
661 mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
662 mergeMeta bId groups =
663 let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
664 in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
667 groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
668 groupsToBranches' groups =
669 {- run the related component algorithm -}
670 let egos = map (\g -> [getGroupId g]
671 ++ (map fst $ g ^. phylo_groupPeriodParents)
672 ++ (map fst $ g ^. phylo_groupPeriodChilds)
673 ++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups
674 graph = relatedComponents egos
675 {- update each group's branch id -}
677 let groups' = elems $ restrictKeys groups (Set.fromList ids)
678 bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
679 in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
682 relatedComponents :: Ord a => [[a]] -> [[a]]
683 relatedComponents graph = foldl' (\branches groups ->
685 then branches ++ [groups]
687 let branchPart = partition (\branch -> disjoint (Set.fromList branch) (Set.fromList groups)) branches
688 in (fst branchPart) ++ [nub $ concat $ (snd branchPart) ++ [groups]]) [] graph
691 toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
692 toRelatedComponents nodes edges =
693 let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
694 clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
695 in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
698 traceSynchronyEnd :: Phylo -> Phylo
699 traceSynchronyEnd phylo =
700 trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
701 <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
702 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
705 traceSynchronyStart :: Phylo -> Phylo
706 traceSynchronyStart phylo =
707 trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
708 <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
709 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
717 getSensibility :: PhyloSimilarity -> Double
718 getSensibility proxi = case proxi of
719 WeightedLogJaccard s _ -> s
720 WeightedLogSim s _ -> s
721 Hamming _ _ -> undefined
723 getMinSharedNgrams :: PhyloSimilarity -> Int
724 getMinSharedNgrams proxi = case proxi of
725 WeightedLogJaccard _ m -> m
726 WeightedLogSim _ m -> m
727 Hamming _ _ -> undefined
733 intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
734 intersectInit acc lst lst' =
735 if (null lst) || (null lst')
737 else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
738 then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
741 branchIdsToSimilarity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
742 branchIdsToSimilarity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
744 ngramsInBranches :: [[PhyloGroup]] -> [Int]
745 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
748 traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
749 traceMatchSuccess thr qua qua' nextBranches =
750 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
751 $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
752 <> ",(1.." <> show (length nextBranches) <> ")]"
753 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
754 <> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
755 <> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
758 traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
759 traceMatchFailure thr qua qua' branches =
760 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
761 <> ",(1.." <> show (length branches) <> ")]"
762 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
763 <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
767 traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
768 traceMatchNoSplit branches =
769 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
770 <> ",(1.." <> show (length branches) <> ")]"
771 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
772 <> " - unable to split in smaller branches" <> "\n"
776 traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
777 traceMatchLimit branches =
778 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
779 <> ",(1.." <> show (length branches) <> ")]"
780 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
781 <> " - unable to increase the threshold above 1" <> "\n"
785 traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
786 traceMatchEnd groups =
787 trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
788 <> " branches and " <> show (length groups) <> " groups" <> "\n") groups
791 traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
792 traceTemporalMatching groups =
793 trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
796 traceGroupsProxi :: [Double] -> [Double]
798 trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups Similarity" <> "\n") l