2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Viz.Phylo.PhyloExport where
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
16 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Viz.AdaptativePhylo
22 import Gargantext.Viz.Phylo.PhyloTools
25 import Data.GraphViz hiding (DotGraph, Order)
26 import Data.GraphViz.Types.Generalised (DotGraph)
27 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
28 import Data.GraphViz.Types.Monadic
29 import Data.Text.Lazy (fromStrict, pack, unpack)
30 import System.FilePath
31 import Debug.Trace (trace)
33 import qualified Data.Text as Text
34 import qualified Data.Vector as Vector
35 import qualified Data.Text.Lazy as Lazy
36 import qualified Data.GraphViz.Attributes.HTML as H
42 dotToFile :: FilePath -> DotGraph DotId -> IO ()
43 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
45 dotToString :: DotGraph DotId -> [Char]
46 dotToString dotG = unpack (printDotGraph dotG)
48 dynamicToColor :: Double -> H.Attribute
50 | d == 0 = H.BGColor (toColor LightCoral)
51 | d == 1 = H.BGColor (toColor Khaki)
52 | d == 2 = H.BGColor (toColor SkyBlue)
53 | otherwise = H.Color (toColor Black)
55 pickLabelColor :: [Double] -> H.Attribute
57 | elem 0 lst = dynamicToColor 0
58 | elem 2 lst = dynamicToColor 2
59 | elem 1 lst = dynamicToColor 1
60 | otherwise = dynamicToColor 3
62 toDotLabel :: Text.Text -> Label
63 toDotLabel lbl = StrLabel $ fromStrict lbl
65 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
66 toAttr k v = customAttribute k v
68 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
69 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
71 groupIdToDotId :: PhyloGroupId -> DotId
72 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
74 branchIdToDotId :: PhyloBranchId -> DotId
75 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
77 periodIdToDotId :: PhyloPeriodId -> DotId
78 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
80 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
81 groupToTable fdt g = H.Table H.HTable
82 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
83 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
84 , H.tableRows = [header]
85 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
86 <> ( map ngramsToRow $ splitEvery 4
87 $ reverse $ sortOn (snd . snd)
88 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
89 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
91 --------------------------------------
92 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
93 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
94 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
95 --------------------------------------
98 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
99 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
100 <> (fromStrict " ( ")
101 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
102 <> (fromStrict " , ")
103 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
104 <> (fromStrict " ) ")
105 <> (pack $ show (getGroupId g)))]]
106 --------------------------------------
108 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
109 branchToDotNode b bId =
110 node (branchIdToDotId $ b ^. branch_id)
111 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
112 <> (metaToAttr $ b ^. branch_meta)
113 <> [ toAttr "nodeType" "branch"
114 , toAttr "bId" (pack $ show bId)
115 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
116 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
117 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
118 , toAttr "label" (pack $ show $ b ^. branch_label)
121 periodToDotNode :: (Date,Date) -> Dot DotId
122 periodToDotNode prd =
123 node (periodIdToDotId prd)
124 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
125 <> [ toAttr "nodeType" "period"
126 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
127 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
130 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
131 groupToDotNode fdt g bId =
132 node (groupIdToDotId $ getGroupId g)
133 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
134 <> [ toAttr "nodeType" "group"
135 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
136 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
137 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
138 , toAttr "bId" (pack $ show bId)
139 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
142 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
143 toDotEdge source target lbl edgeType = edge source target
145 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
146 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "link" ]
147 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
148 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ]
149 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
150 , Label (StrLabel $ fromStrict lbl)]
151 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
154 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
155 mergePointers groups =
156 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
157 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
158 in unionWith (\w w' -> max w w') toChilds toParents
161 toBid :: PhyloGroup -> [PhyloBranch] -> Int
163 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
164 in fromJust $ elemIndex b' bs
166 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
167 exportToDot phylo export =
168 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
169 <> show(length $ export ^. export_groups) <> " groups "
170 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
171 <> "##########################") $
172 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
174 -- | 1) init the dot graph
175 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
176 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
178 , Style [SItem Filled []],Color [toWColor White]]
179 -- | home made attributes
180 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
181 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
182 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
183 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
184 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
185 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
189 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
191 -- | 2) create a layer for the branches labels
192 subgraph (Str "Branches peaks") $ do
194 graphAttrs [Rank SameRank]
196 -- | 3) group the branches by hierarchy
197 -- mapM (\branches ->
198 -- subgraph (Str "Branches clade") $ do
199 -- graphAttrs [Rank SameRank]
201 -- -- | 4) create a node for each branch
202 -- mapM branchToDotNode branches
203 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
205 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
207 -- | 5) create a layer for each period
208 _ <- mapM (\period ->
209 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
210 graphAttrs [Rank SameRank]
211 periodToDotNode period
213 -- | 6) create a node for each group
214 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
215 ) $ getPeriodIds phylo
217 -- | 7) create the edges between a branch and its first groups
218 _ <- mapM (\(bId,groups) ->
219 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
222 $ map (\groups -> head' "toDot"
223 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
224 $ sortOn (fst . _phylo_groupPeriod) groups)
225 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
227 -- | 8) create the edges between the groups
228 _ <- mapM (\((k,k'),_) ->
229 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
230 ) $ (toList . mergePointers) $ export ^. export_groups
232 -- | 7) create the edges between the periods
233 _ <- mapM (\(prd,prd') ->
234 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
235 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
237 -- | 8) create the edges between the branches
238 -- _ <- mapM (\(bId,bId') ->
239 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
240 -- (Text.pack $ show(branchIdsToProximity bId bId'
241 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
242 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
243 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
246 graphAttrs [Rank SameRank]
256 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
257 filterByBranchSize thr export =
258 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
259 in export & export_branches .~ (fst branches')
260 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
263 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
264 processFilters filters qua export =
265 foldl (\export' f -> case f of
266 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
267 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
268 else filterByBranchSize thr export'
275 branchToIso :: [PhyloBranch] -> [PhyloBranch]
276 branchToIso branches =
279 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
281 $ ([0] ++ (map (\(b,b') ->
282 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
283 in (b' ^. branch_seaLevel) !! (idx - 1)
284 ) $ listToSeq branches))
285 in map (\(x,b) -> b & branch_x .~ x)
289 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
290 sortByHierarchy depth branches =
291 if (length branches == 1)
292 then branchToIso branches
293 else branchToIso $ concat
295 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
296 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
297 ++ (sortByHierarchy (depth + 1) (snd partitions)))
298 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
299 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
302 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
303 sortByBirthDate order export =
304 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
305 branches' = case order of
307 Desc -> reverse branches
308 in export & export_branches .~ branches'
310 processSort :: Sort -> PhyloExport -> PhyloExport
311 processSort sort' export = case sort' of
312 ByBirthDate o -> sortByBirthDate o export
313 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
320 -- | Return the conditional probability of i knowing j
321 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
322 conditional m i j = (findWithDefault 0 (i,j) m)
326 -- | Return the genericity score of a given ngram
327 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
328 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
329 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
332 -- | Return the specificity score of a given ngram
333 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
334 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
335 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
338 -- | Return the inclusion score of a given ngram
339 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
340 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
341 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
344 ngramsMetrics :: PhyloExport -> PhyloExport
345 ngramsMetrics export =
348 (\g -> g & phylo_groupMeta %~ insert "genericity"
349 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
350 & phylo_groupMeta %~ insert "specificity"
351 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
352 & phylo_groupMeta %~ insert "inclusion"
353 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
357 branchDating :: PhyloExport -> PhyloExport
358 branchDating export =
359 over ( export_branches
362 let groups = sortOn fst
363 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
364 then acc ++ [g ^. phylo_groupPeriod]
365 else acc ) [] $ export ^. export_groups
367 birth = fst $ head' "birth" groups
368 age = (snd $ last' "age" groups) - birth
369 in b & branch_meta %~ insert "birth" [fromIntegral birth]
370 & branch_meta %~ insert "age" [fromIntegral age]
371 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
373 processMetrics :: PhyloExport -> PhyloExport
374 processMetrics export = ngramsMetrics
375 $ branchDating export
382 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
383 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
386 $ sortOn snd $ zip [0..] meta
389 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
390 mostInclusive nth foundations export =
391 over ( export_branches
394 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
395 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
396 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
397 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
398 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
399 in b & branch_label .~ lbl ) export
402 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
403 mostEmergentInclusive nth foundations export =
407 let lbl = ngramsToLabel foundations
409 $ map (\(_,(_,idx)) -> idx)
411 $ map (\groups -> sortOn (fst . snd) groups)
412 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
413 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
414 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
415 in g & phylo_groupLabel .~ lbl ) export
418 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
419 processLabels labels foundations export =
420 foldl (\export' label ->
422 GroupLabel tagger nth ->
424 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
425 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
426 BranchLabel tagger nth ->
428 MostInclusive -> mostInclusive nth foundations export'
429 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
437 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
438 toDynamics n parents g m =
439 let prd = g ^. phylo_groupPeriod
440 end = last' "dynamics" (sort $ map snd $ elems m)
441 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
444 else if ((fst prd) == (fst $ m ! n))
452 --------------------------------------
454 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
457 processDynamics :: [PhyloGroup] -> [PhyloGroup]
458 processDynamics groups =
460 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
461 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
462 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
464 --------------------------------------
465 mapNgrams :: Map Int (Date,Date)
466 mapNgrams = map (\dates ->
467 let dates' = sort dates
468 in (head' "dynamics" dates', last' "dynamics" dates'))
470 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
471 $ (g ^. phylo_groupNgrams))) [] groups
474 ---------------------
475 -- | phyloExport | --
476 ---------------------
478 toPhyloExport :: Phylo -> DotGraph DotId
479 toPhyloExport phylo = exportToDot phylo
480 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
481 $ processSort (exportSort $ getConfig phylo)
482 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
483 $ processMetrics export
485 export :: PhyloExport
486 export = PhyloExport groups branches
487 --------------------------------------
488 branches :: [PhyloBranch]
489 branches = map (\g ->
490 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
491 breaks = (g ^. phylo_groupMeta) ! "breaks"
492 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
493 in PhyloBranch (g ^. phylo_groupBranchId)
497 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
501 $ map (\gs -> head' "export" gs)
502 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
503 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
504 --------------------------------------
505 groups :: [PhyloGroup]
506 groups = traceExportGroups
508 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
509 $ tracePhyloInfo phylo
512 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
513 traceExportBranches branches = trace ("\n"
514 <> "-- | Export " <> show(length branches) <> " branches") branches
516 tracePhyloInfo :: Phylo -> Phylo
517 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
518 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
519 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
523 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
524 traceExportGroups groups = trace ("\n" <> "-- | Export "
525 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
526 <> show(length groups) <> " groups and "
527 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"