{-# LANGUAGE OverloadedStrings #-} module Phylomemy.DOT where import Control.Applicative (Applicative (..)) import Control.Monad (Monad (..), foldM_, forM_, mapM_, when, zipWithM_) import Control.Monad.Trans.Class qualified as MT import Control.Monad.Trans.Reader qualified as MT import Control.Monad.Trans.Writer.CPS qualified as MT import Data.Bool (otherwise, (&&)) import Data.ByteString.Builder qualified as BS import Data.ByteString.Short qualified as BSh import Data.Eq (Eq (..)) import Data.Foldable (foldMap', toList) import Data.Function (id, on, ($), (&), (.)) import Data.Functor ((<&>)) import Data.Int (Int) import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Maybe (Maybe (..), fromMaybe) import Data.Monoid (Monoid (..)) import Data.Ord (Ord (..)) import Data.Semigroup (Min (..), Semigroup (..)) import Data.Set (Set) import Data.Set qualified as Set import Data.String (String) import Data.Text.Lazy qualified as LazyText import Data.Text.Lazy.Encoding qualified as LazyText import Data.Text.Short qualified as TS import Data.Tree qualified as Tree import Debug.Pretty.Simple (pTraceShow) import GHC.Real (floor) import GHC.Stack (HasCallStack) import Numeric (showFFloat) import Numeric.Probability import Text.Pretty.Simple (pShow) import Text.Printf (printf) import Text.Show (Show (..)) import Prelude import Phylomemy.Indexation import Phylomemy.TemporalMatching type MST = Int type ClusterGroup = Int -- | @(`dotMaximalSpanningForest` scaleToMsf)@ -- returns a graph of the given `MaximalSpanningForest` -- in [DOT](https://graphviz.org/doc/info/lang.html) format. -- -- Each maximal spanning tree is here pruned of its edges -- from the lowest `Similarity` it contains, to the highest, -- each level is called a `Scale`, and groups `(range, cluster)` differently, -- hence this actually returns a `Map.Map` of `Scale` to graph. dotMaximalSpanningForest :: forall range cluster. HasCallStack => cluster ~ Cluster => Show range => Show cluster => Ord range => Ord cluster => ShowHuman range => ShowHuman cluster => MaximalSpanningForest range cluster -> Int :-> BS.Builder dotMaximalSpanningForest msf = -- pTraceShow ("scaleToRangeToMstToGroupToClusters", Map.findMin scaleToRangeToMstToGroupToClusters) $ scaleToRangeToMstToGroupToClusters & Map.mapWithKey \scaleI rangeToMstToGroupToClusters -> runDOT do -- forM_ (similToRangeToMstToGroupToClusters & Map.toList) \(minSimil, rangeToMstToGroupToClusters) -> do dotComments [(BS.lazyByteString $ LazyText.encodeUtf8 $ pShow $ rangeToMstToGroupToClusters & Map.map Map.keys)] -- pTraceShow ("num of nodes", Map.size nodeToBranch, "num of branches", Map.size msf) $ dotLine "digraph g" dotBlock do dotLine "splines=\"ortho\"" indexFrom1M_ (rangeToMstToGroupToClusters & Map.toList) \(srcR, mstToGroupToClusters) srcRI -> do let srcRB = "r" <> BS.intDec srcRI dotLine $ "subgraph cluster_" <> srcRB dotBlock do dotComments ["Create a node for the range " <> srcRB] dotNode srcRB [ ("shape", "box") , ("label", builderQuotedString (showHuman srcR)) , ("color", "gray") , ("style", "filled") , ("fillcolor", "gray") ] dotLine "color=gray" dotBlock do dotLine "rank=same" dotComments ["Create the cluster nodes within the range " <> srcRB] forM_ (mstToGroupToClusters & Map.toList) \(mstI, groupToClusters) -> do forM_ (groupToClusters & Map.toList) \(srcGroup, srcClusters) -> do dotNodeCluster srcRI mstI srcGroup [ ( "label" , builderQuotedString $ (srcClusters & toList <&> showHuman & List.unlines) <> "\nT" <> printf "%03d" mstI <> "\nS" <> show scaleI -- <> {-maybe ""-} (("\n" <>) . showSimilarity) minSimil ) , ("style", "filled") , -- , minSimil & {-maybe ("", "")-} (\s -> ("fillcolor", 1 + ((floor (runProbability s * 10)) `mod` 10) & BS.intDec)) ("colorscheme", "ylorrd9") , ("shape", "box") ] dotComments ["Horizontally align the cluster nodes within the same range"] let row = [ (mstI, group) | (mstI, groupToClusters) <- mstToGroupToClusters & Map.toList , (group, _clusters) <- groupToClusters & Map.toList ] case row of [] -> return () c@(firstMst, firstGroup) : cs -> do dotEdges [srcRB, srcRB <> "t" <> BS.intDec firstMst <> "c" <> BS.intDec firstGroup] [ ("style", "invis") ] cs & (`foldM_` c) \(srcMst, srcGroup) dst@(dstMst, dstGroup) -> do dotEdgesCluster [(srcRI, srcMst, srcGroup), (srcRI, dstMst, dstGroup)] [ ("weight", "10") , ("style", "invis") ] return dst indexFrom1M_ sortedMSF \mst mstI -> do dotComments ["Create the edges of the MST " <> BS.intDec mstI] -- pTraceShowM (mstI, List.length (Tree.flatten mst)) let loop (Tree.Node MSTNode{mstNodeRangeCluster = src} dsts) = do forM_ dsts \dstNode@(Tree.Node MSTNode{mstNodeRangeCluster = dst, mstNodeSimilarity = simil} _) -> do -- let similB = BS.stringUtf8 $ showFFloat (Just 2) (simil & runProbability & fromRational @Double) "" let indexRangeCluster (r, c) = let clusterToGroup :: cluster :-> ClusterGroup = Map.fromList [ (cluster, group) | (group, clusters) <- rangeToMstToGroupToClusters & Map.lookup r & fromMaybe Map.empty & Map.lookup mstI & fromMaybe Map.empty & Map.toList , cluster <- clusters & Set.toList ] in ( 1 + Map.findIndex r rangeToMstToGroupToClusters , mstI , Map.lookup c clusterToGroup & fromMaybe (error (LazyText.unpack (pShow ("r", r, "c", c {-, "clusterToGroup", clusterToGroup, "rangeToMstToGroupToClusters", rangeToMstToGroupToClusters-})))) ) dotEdgesCluster [ indexRangeCluster src , indexRangeCluster dst ] [ ("constraint", "false") , ("color", (floor (runProbability simil * 10)) `mod` 10 & BS.intDec) , ("colorscheme", "ylorrd9") , -- , ("label", similB) ("fontcolor", "blue") , ("dir", "both") , ("arrowhead", "dot") , ("arrowtail", "dot") ] loop dstNode loop mst dotRanges rangeToMstToGroupToClusters where -- TODO: improve this, this is just a quick attempt to stabilize the DOT sortedMSF = msf & List.sortBy (compare `on` mstNodeRangeCluster . Tree.rootLabel) -- Deep remapping of the `msf` to something more suitable for generating the DOT scaleToRangeToMstToGroupToClusters :: {-scale-} Int :-> range :-> MST :-> ClusterGroup :-> Set cluster = let merge = Map.unionWith (Map.unionWith (Map.unionWith Set.union)) in Map.unionsWith merge [ Map.unionsWith merge [ Map.unionsWith merge [ Map.fromListWith merge $ [ {-(if scaleI == 3 then pTraceShow (["scaleToRangeToMstToGroupToClusters"], "scaleI", scaleI, "scaleSimil", scaleSimil, "range", range, "mstI", mstI, "clusterGroup", clusterGroup, "cluster", cluster) else id) $-} (scaleI,) $ Map.singleton range $ Map.singleton mstI $ Map.singleton clusterGroup $ Set.singleton cluster | MSTNode{mstNodeRangeCluster = (range, cluster)} <- scaleMST & toList ] | (clusterGroup, scaleMST) <- -- (if mstI == 2 then pTraceShow ("scaleI", scaleI, "scaleMSF", scaleMSF) else id) $ scaleMsf & List.zip [1 :: ClusterGroup ..] ] | (mstI, scaleMsf) <- mstToMsf & Map.toList ] | (scaleI, mstToMsf) <- mstScales sortedMSF & Map.toList ] showSimilarity (s :: Similarity) = showFFloat (Just 2) (s & runProbability & fromRational @Double) "" dotRanges :: range :-> a -> DOT dotRanges rangeTo = do dotComments ["Vertically align range nodes"] let rangeLinks = [ "r" <> BS.intDec srcRI | srcRI <- [1 .. Map.size rangeTo] ] when (1 < List.length rangeLinks) do dotEdges rangeLinks [("weight", "10"), ("style", "invis")] dotNodeCluster :: Int -> Int -> Int -> [(BS.Builder, BS.Builder)] -> DOT dotNodeCluster r t c = dotNode ("r" <> BS.intDec r <> "t" <> BS.intDec t <> "c" <> BS.intDec c) dotEdgesCluster :: [(Int, Int, Int)] -> [(BS.Builder, BS.Builder)] -> DOT dotEdgesCluster rtc = dotEdges [ ("r" <> BS.intDec r <> "t" <> BS.intDec t <> "c" <> BS.intDec c) | (r, t, c) <- rtc ] -- Alternative to `Show` to get a more human-readable `String`. class ShowHuman a where showHuman :: a -> String instance ShowHuman (Set.Set Root) where showHuman a = mconcat (List.intersperse " & " (as <&> TS.unpack)) where as = a & Set.toList <&> unNgram . rootLabel instance ShowHuman Int where showHuman = show type DOT = MT.ReaderT BSh.ShortByteString (MT.Writer BS.Builder) () indexFrom1M_ :: Applicative m => [a] -> (a -> Int -> m b) -> m () indexFrom1M_ xs f = zipWithM_ f xs [1 :: Int ..] runDOT :: DOT -> BS.Builder runDOT = MT.execWriter . (`MT.runReaderT` {-indent-} "") dotBlock :: DOT -> DOT dotBlock s = do dotLine "{" () <- MT.withReaderT (" " <>) s dotLine "}" dotLine :: BS.Builder -> DOT dotLine s = do indent <- MT.ask MT.lift $ MT.tell $ BS.shortByteString indent <> s <> "\n" dotComments :: [BS.Builder] -> DOT dotComments cs = do dotLine "/*" forM_ cs dotLine dotLine "*/" dotEdges :: [BS.Builder] -> [(BS.Builder, BS.Builder)] -> DOT dotEdges names as = dotLine $ mconcat (List.intersperse " -> " names) <> builderAttrs as dotNode :: BS.Builder -> [(BS.Builder, BS.Builder)] -> DOT dotNode name as = dotLine $ name <> builderAttrs as builderAttrs :: [(BS.Builder, BS.Builder)] -> BS.Builder builderAttrs as | List.null as = "" | otherwise = "[" <> mconcat (List.intersperse "," [k <> "=" <> v | (k, v) <- as, BS.toLazyByteString k /= ""]) <> "]" builderQuotedString :: String -> BS.Builder builderQuotedString cs = BS.charUtf8 '"' <> foldMap' escape cs <> BS.charUtf8 '"' where escape '\\' = BS.charUtf8 '\\' <> BS.charUtf8 '\\' escape '\"' = BS.charUtf8 '\\' <> BS.charUtf8 '\"' escape c = BS.charUtf8 c