{-# LANGUAGE OverloadedStrings #-} module Phylomemy.DOT where import Control.Applicative (Applicative (..)) import Control.Monad (Monad (..), foldM_, forM_, mapM_, when, zipWithM_) import Control.Monad.Trans.Class qualified as MT import Control.Monad.Trans.Reader qualified as MT import Control.Monad.Trans.Writer.CPS qualified as MT import Data.Bool (otherwise) import Data.ByteString.Builder qualified as BS import Data.ByteString.Short qualified as BSh import Data.Eq (Eq (..)) import Data.Foldable (foldMap', toList) import Data.Function (on, ($), (&), (.)) import Data.Functor ((<&>)) import Data.Int (Int) import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Maybe (Maybe (..), maybe) import Data.Monoid (Monoid (..)) import Data.Ord (Ord (..)) import Data.Semigroup (Semigroup (..)) import Data.Set (Set) import Data.Set qualified as Set import Data.String (String) import Data.Text.Short qualified as TS import GHC.Real (floor) import Data.Tuple (snd) import Numeric (showFFloat) -- import Debug.Pretty.Simple (pTraceShow, pTraceShowM) import Data.Tree qualified as Tree import Numeric.Probability import Text.Printf (printf) import Text.Show (Show (..)) import Prelude import Phylomemy.Indexation import Phylomemy.TemporalMatching -- | @(`dotSimilarities` phy)@ -- returns a graph of the given `Phylomemy` in [DOT](https://graphviz.org/doc/info/lang.html) format. -- -- TODO: order clusters of a range by their similarity dotSimilarities :: forall range cluster. Show range => Show cluster => Ord range => Ord cluster => Stringable range => Stringable cluster => AllSimilarities range cluster -> MaximalSpanningTrees range cluster -> BS.Builder dotSimilarities srcRCdstRSC msf = runDOT do let sortedMSF = msf & List.sortBy (compare `on` (\(Tree.Node n _) -> mstNodeRangeCluster n)) let rangeToMSTToClusters :: range :-> {-mstI-} Int :-> (Maybe Similarity, Set cluster) = let merge = Map.unionWith \(minSimil, x) (_minSimil, y) -> (minSimil, Set.union x y) in Map.unionsWith merge [ Map.fromListWith merge $ case mst of Tree.Node MSTNode{mstNodeRangeCluster = (rootR, rootC)} ts -> (rootR, Map.singleton mstI (minSimil, Set.singleton rootC)) : [ (range, Map.singleton mstI (minSimil, Set.singleton cluster)) | MSTNode{mstNodeRangeCluster = (range, cluster)} <- ts & List.concatMap toList ] | (mstI, mst) <- sortedMSF & List.zip [1 :: Int ..] , let minSimil = minimumSimilarity mst ] let showSimilarity (s :: Similarity) = showFFloat (Just 2) (s & runProbability & fromRational @Double) "" dotComments [(BS.stringUtf8 $ show $ rangeToMSTToClusters & Map.map Map.keys)] -- pTraceShow ("num of nodes", Map.size nodeToBranch, "num of branches", Map.size msf) $ dotLine "digraph g" dotBlock do dotLine "splines=\"ortho\"" indexFrom1M_ (rangeToMSTToClusters & Map.toList) \(srcR, mstToClusters) srcRI -> do let srcRB = "r" <> BS.intDec srcRI dotLine $ "subgraph cluster_" <> srcRB dotBlock do dotComments ["Create a node for the range " <> srcRB] dotNode srcRB [ ("shape", "box") , ("label", builderQuotedString (string srcR)) , ("color", "gray") , ("style", "filled") , ("fillcolor", "gray") ] dotLine "color=gray" dotBlock do dotLine "rank=same" dotComments ["Create the cluster nodes within the range " <> srcRB] forM_ (mstToClusters & Map.toList) \(mstI, (minSimil, clusters)) -> do indexFrom1M_ (clusters & toList) \srcC srcCI -> do dotNodeCluster srcRI mstI srcCI [ ("label", builderQuotedString $ string srcC <> "\nT" <> printf "%03d" mstI <> maybe "" (("\n" <>) . showSimilarity) minSimil) , ("style", "filled") , minSimil & maybe ("", "") (\s -> ("fillcolor", 1 + ((floor (runProbability s * 10)) `mod` 10) & BS.intDec)) , ("colorscheme", "ylorrd9") , ("shape", "box") ] dotComments ["Horizontally align the cluster nodes within the same range"] let row = [ (mstI, clusterI) | (mstI, (_minSimil, clusters)) <- mstToClusters & Map.toList , clusterI <- [1 .. Set.size clusters] ] case row of [] -> return () c@(firstTI, firstCI) : cs -> do dotEdges [srcRB, srcRB <> "t" <> BS.intDec firstTI <> "c" <> BS.intDec firstCI] [ ("style", "invis") ] cs & (`foldM_` c) \(srcTI, srcCI) dst@(dstTI, dstCI) -> do dotEdgesCluster [(srcRI, srcTI, srcCI), (srcRI, dstTI, dstCI)] [ ("weight", "10") , ("style", "invis") ] return dst {- indexFrom1M_ (srcRCdstRSC & Map.toList) \(srcR, srcCdstRSC) srcRI -> do let srcRB = "r" <> BS.intDec srcRI let clusterToBranch = rangeToClusterToBranch Map.! srcR dotLine $ "subgraph cluster_" <> srcRB dotBlock do dotComments ["Create a node for the range " <> srcRB] dotNode srcRB [ ("shape", "box") , ("label", builderQuotedString (string srcR)) , ("color", "gray") , ("style", "filled") , ("fillcolor", "gray") ] dotLine "color=gray" dotBlock do dotLine "rank=same" dotComments ["Create the cluster nodes within the range " <> srcRB] indexFrom1M_ (srcCdstRSC & Map.toList) \(srcC, _dstRSC) srcCI -> do let (srcBI, srcBS) = clusterToBranch Map.! srcC let showSimilarity s = showFFloat (Just 2) (s & runProbability & fromRational @Double) "" dotNodeCluster srcRI srcCI [ ("label", builderQuotedString $ string srcC <> "\nT" <> printf "%03d" srcBI <> maybe "" (("\n" <>) . showSimilarity) srcBS) , ("style", "filled") , srcBS & maybe ("", "") (\s -> ("fillcolor", (floor (runProbability s * 10)) `mod` 10 & BS.intDec)) , ("colorscheme", "ylorrd9") , ("shape", "box") ] dotComments ["Horizontally align the cluster nodes within the same range"] when (1 <= Map.size srcCdstRSC) do dotEdges [srcRB, srcRB <> "c" <> BS.intDec 1] [("style", "invis")] forM_ (List.zip [1 .. Map.size srcCdstRSC - 1] [2 .. Map.size srcCdstRSC]) \(srcCI, dstCI) -> dotEdgesCluster [(srcRI, dstTI, srcCI), (srcRI, dstTI, dstCI)] [ ("weight", "10") , ("style", "invis") ] -} indexFrom1M_ sortedMSF \mst mstI -> do dotComments [ "Create the edges of the MST " <> BS.intDec mstI ] --pTraceShowM (mstI, List.length (Tree.flatten mst)) let loop (Tree.Node MSTNode{mstNodeRangeCluster=src} dsts) = do forM_ dsts \dstNode@(Tree.Node MSTNode{mstNodeRangeCluster=dst, mstNodeSimilarity=simil} _) -> do let similB = BS.stringUtf8 $ showFFloat (Just 2) (simil & runProbability & fromRational @Double) "" let indexRangeCluster (r,c) = ( 1 + Map.findIndex r rangeToMSTToClusters , mstI , 1 + Set.findIndex c (rangeToMSTToClusters Map.! r Map.! mstI & snd) ) dotEdgesCluster [ indexRangeCluster src , indexRangeCluster dst ] [ ("constraint", "false") , ("color", (floor (runProbability simil * 10)) `mod` 10 & BS.intDec) , ("colorscheme", "ylorrd9") --, ("label", similB) , ("fontcolor", "blue") , ("dir", "both") , ("arrowhead", "dot") , ("arrowtail", "dot") ] loop dstNode loop mst {- indexFrom1M_ (srcCdstRSC & Map.toList) \(srcC, dstRSC) srcCI -> do -- TODO: print only the closest downstream/upstream edge of the branch forM_ [directionDownstream] \stream -> forM_ (stream dstRSC & Map.toList) \(dstR, dstSC) -> do let dstRI = 1 + Map.findIndex dstR srcRCdstRSC let (srcBI, srcBS) = clusterToBranch Map.! srcC forM_ (dstSC & Map.toList) \(simil, dstCs) -> do when (srcBS <= simil) do forM_ dstCs \dstC -> do let (dstBI, dstBS) = rangeToClusterToBranch Map.! dstR Map.! dstC when (srcBI == dstBI) do let dstCI = 1 + Map.findIndex dstC (srcRCdstRSC Map.! dstR) let srcRCB = mconcat [srcRB, "c", BS.intDec srcCI] let dstRCB = mconcat ["r", BS.intDec dstRI, "c", BS.intDec dstCI] let similB = showFFloat @Double (Just 2) (fromRational (runProbability simil)) "" dotEdges [srcRCB, dstRCB] [ ("weight", BS.stringUtf8 similB) , ("label", builderQuotedString $ similB) , ("fontcolor", "gray60") , ("constraint", "false") ] -} dotRanges srcRCdstRSC dotRanges :: range :-> a -> DOT dotRanges rangeTo = do dotComments ["Vertically align range nodes"] let rangeLinks = [ "r" <> BS.intDec srcRI | srcRI <- [1 .. Map.size rangeTo] ] when (1 < List.length rangeLinks) do dotEdges rangeLinks [("weight", "10"), ("style", "invis")] dotNodeCluster :: Int -> Int -> Int -> [(BS.Builder, BS.Builder)] -> DOT dotNodeCluster r t c = dotNode ("r" <> BS.intDec r <> "t" <> BS.intDec t <> "c" <> BS.intDec c) dotEdgesCluster :: [(Int, Int, Int)] -> [(BS.Builder, BS.Builder)] -> DOT dotEdgesCluster rtc = dotEdges [ ("r" <> BS.intDec r <> "t" <> BS.intDec t <> "c" <> BS.intDec c) | (r, t, c) <- rtc ] {- dotMaximalSpanningTrees :: forall range cluster. Show range => Show cluster => Ord range => Ord cluster => Stringable range => Stringable cluster => MaximalSpanningTrees range cluster -> BS.Builder dotMaximalSpanningTrees msf = runDOT do dotLine "digraph g" let rangeToClusters :: range :-> Set cluster = Map.unionsWith Set.union [ Map.fromListWith Set.union [ (range, Set.singleton cluster) | MSTNode{mstNodeRangeCluster=(range, cluster)} <- mst & toList ] | mst <- msf ] dotBlock do dotLine "splines=\"ortho\"" indexFrom1M_ (rangeToClusters & Map.toList) \(srcR, srcCs) srcRI -> do let srcRB = "r" <> BS.intDec srcRI dotLine $ "subgraph cluster_" <> srcRB dotBlock do dotComments ["Create a node for the range " <> srcRB] dotNode srcRB [ ("shape", "box") , ("label", builderQuotedString (string srcR)) , ("color", "gray") , ("style", "filled") , ("fillcolor", "gray") ] dotLine "color=gray" dotBlock do dotLine "rank=same" dotComments ["Create the cluster nodes within the range " <> srcRB] indexFrom1M_ (srcCs & toList) \srcC srcCI -> do dotNodeCluster srcRI srcCI [ ("label", builderQuotedString (string srcC)) ] dotComments ["Horizontally align the cluster nodes within the same range"] when (1 <= Set.size srcCs) do dotEdges [srcRB, srcRB <> "c" <> BS.intDec 1] [ ("style", "invis") ] forM_ (List.zip [1 .. Set.size srcCs - 1] [2 .. Set.size srcCs]) \(srcCI, dstCI) -> dotEdgesCluster [(srcRI, srcCI), (srcRI, dstCI)] [ ("weight", "10") , ("style", "invis") ] dotRanges rangeToClusters dotComments ["Create the inter-range edges between clusters"] indexFrom1M_ msf \mst _mstI -> let loop (Tree.Node MSTNode{mstNodeRangeCluster=src} dsts) = do forM_ dsts \dstTree@(Tree.Node MSTNode{mstNodeRangeCluster=dst, mstNodeSimilarity=simil} _) -> do let similB = BS.doubleDec (fromRational (runProbability simil)) let indexRangeCluster (r,c) = ( 1 + Map.findIndex r rangeToClusters , 1 + Set.findIndex c (rangeToClusters Map.! r) ) dotEdgesCluster [ indexRangeCluster src , indexRangeCluster dst ] [ ("constraint", "false") , ("color", "blue") , ("label", similB) , ("fontcolor", "blue") ] loop dstTree in loop mst -} -- nodeBranchIndex :: -- Eq range => -- Eq cluster => -- MaximalSpanningTrees range cluster -> -- (range, cluster) -> -- Maybe Int -- nodeBranchIndex rootToMST node = -- List.findIndex -- ( \(root, mst) -> -- root == node -- || any -- ( \links -> -- any -- (\(src, dsts) -> src == node || List.elem node dsts) -- (links & Map.toList) -- ) -- mst -- ) -- (rootToMST & Map.toList) class Stringable a where string :: a -> String instance Stringable (Set.Set Root) where string a = mconcat (List.intersperse " & " (as <&> TS.unpack)) where as = a & Set.toList <&> unNgram . rootLabel instance Stringable Int where string = show type DOT = MT.ReaderT BSh.ShortByteString (MT.Writer BS.Builder) () indexFrom1M_ :: Applicative m => [a] -> (a -> Int -> m b) -> m () indexFrom1M_ xs f = zipWithM_ f xs [1 :: Int ..] runDOT :: DOT -> BS.Builder runDOT = MT.execWriter . (`MT.runReaderT` {-indent-} "") dotBlock :: DOT -> DOT dotBlock s = do dotLine "{" () <- MT.withReaderT (" " <>) s dotLine "}" dotLine :: BS.Builder -> DOT dotLine s = do indent <- MT.ask MT.lift $ MT.tell $ BS.shortByteString indent <> s <> "\n" dotComments :: [BS.Builder] -> DOT dotComments = mapM_ \c -> dotLine $ "// " <> c dotEdges :: [BS.Builder] -> [(BS.Builder, BS.Builder)] -> DOT dotEdges names as = dotLine $ mconcat (List.intersperse " -> " names) <> builderAttrs as dotNode :: BS.Builder -> [(BS.Builder, BS.Builder)] -> DOT dotNode name as = dotLine $ name <> builderAttrs as builderAttrs :: [(BS.Builder, BS.Builder)] -> BS.Builder builderAttrs as | List.null as = "" | otherwise = "[" <> mconcat (List.intersperse "," [k <> "=" <> v | (k, v) <- as, BS.toLazyByteString k /= ""]) <> "]" builderQuotedString :: String -> BS.Builder builderQuotedString cs = BS.charUtf8 '"' <> foldMap' escape cs <> BS.charUtf8 '"' where escape '\\' = BS.charUtf8 '\\' <> BS.charUtf8 '\\' escape '\"' = BS.charUtf8 '\\' <> BS.charUtf8 '\"' escape c = BS.charUtf8 c