1 {-# LANGUAGE OverloadedStrings #-}
3 module Phylomemy.DOT where
5 import Control.Applicative (Applicative (..))
6 import Control.Monad (Monad (..), foldM_, forM_, mapM_, when, zipWithM_)
7 import Control.Monad.Trans.Class qualified as MT
8 import Control.Monad.Trans.Reader qualified as MT
9 import Control.Monad.Trans.Writer.CPS qualified as MT
10 import Data.Bool (otherwise)
11 import Data.ByteString.Builder qualified as BS
12 import Data.ByteString.Short qualified as BSh
13 import Data.Eq (Eq (..))
14 import Data.Foldable (foldMap', toList)
15 import Data.Function (on, ($), (&), (.))
16 import Data.Functor ((<&>))
18 import Data.List qualified as List
19 import Data.Map.Strict qualified as Map
20 import Data.Maybe (Maybe (..), maybe)
21 import Data.Monoid (Monoid (..))
22 import Data.Ord (Ord (..))
23 import Data.Semigroup (Semigroup (..))
25 import Data.Set qualified as Set
26 import Data.String (String)
27 import Data.Text.Short qualified as TS
28 import GHC.Real (floor)
29 import Data.Tuple (snd)
30 import Numeric (showFFloat)
32 -- import Debug.Pretty.Simple (pTraceShow, pTraceShowM)
34 import Data.Tree qualified as Tree
35 import Numeric.Probability
36 import Text.Printf (printf)
37 import Text.Show (Show (..))
40 import Phylomemy.Indexation
41 import Phylomemy.TemporalMatching
43 -- | @(`dotSimilarities` phy)@
44 -- returns a graph of the given `Phylomemy` in [DOT](https://graphviz.org/doc/info/lang.html) format.
46 -- TODO: order clusters of a range by their similarity
55 AllSimilarities range cluster ->
56 MaximalSpanningTrees range cluster ->
58 dotSimilarities srcRCdstRSC msf = runDOT do
59 let sortedMSF = msf & List.sortBy (compare `on` (\(Tree.Node n _) -> mstNodeRangeCluster n))
60 let rangeToMSTToClusters :: range :-> {-mstI-} Int :-> (Maybe Similarity, Set cluster) =
61 let merge = Map.unionWith \(minSimil, x) (_minSimil, y) -> (minSimil, Set.union x y)
64 [ Map.fromListWith merge $
66 Tree.Node MSTNode{mstNodeRangeCluster = (rootR, rootC)} ts ->
67 (rootR, Map.singleton mstI (minSimil, Set.singleton rootC))
68 : [ (range, Map.singleton mstI (minSimil, Set.singleton cluster))
69 | MSTNode{mstNodeRangeCluster = (range, cluster)} <- ts & List.concatMap toList
71 | (mstI, mst) <- sortedMSF & List.zip [1 :: Int ..]
72 , let minSimil = minimumSimilarity mst
74 let showSimilarity (s :: Similarity) = showFFloat (Just 2) (s & runProbability & fromRational @Double) ""
75 dotComments [(BS.stringUtf8 $ show $ rangeToMSTToClusters & Map.map Map.keys)]
76 -- pTraceShow ("num of nodes", Map.size nodeToBranch, "num of branches", Map.size msf) $
79 dotLine "splines=\"ortho\""
80 indexFrom1M_ (rangeToMSTToClusters & Map.toList) \(srcR, mstToClusters) srcRI -> do
81 let srcRB = "r" <> BS.intDec srcRI
82 dotLine $ "subgraph cluster_" <> srcRB
84 dotComments ["Create a node for the range " <> srcRB]
88 , ("label", builderQuotedString (string srcR))
91 , ("fillcolor", "gray")
96 dotComments ["Create the cluster nodes within the range " <> srcRB]
97 forM_ (mstToClusters & Map.toList) \(mstI, (minSimil, clusters)) -> do
98 indexFrom1M_ (clusters & toList) \srcC srcCI -> do
103 [ ("label", builderQuotedString $ string srcC <> "\nT" <> printf "%03d" mstI <> maybe "" (("\n" <>) . showSimilarity) minSimil)
104 , ("style", "filled")
105 , minSimil & maybe ("", "") (\s -> ("fillcolor", 1 + ((floor (runProbability s * 10)) `mod` 10) & BS.intDec))
106 , ("colorscheme", "ylorrd9")
109 dotComments ["Horizontally align the cluster nodes within the same range"]
112 | (mstI, (_minSimil, clusters)) <- mstToClusters & Map.toList
113 , clusterI <- [1 .. Set.size clusters]
117 c@(firstTI, firstCI) : cs -> do
119 [srcRB, srcRB <> "t" <> BS.intDec firstTI <> "c" <> BS.intDec firstCI]
122 cs & (`foldM_` c) \(srcTI, srcCI) dst@(dstTI, dstCI) -> do
124 [(srcRI, srcTI, srcCI), (srcRI, dstTI, dstCI)]
131 (srcRCdstRSC & Map.toList)
132 \(srcR, srcCdstRSC) srcRI -> do
133 let srcRB = "r" <> BS.intDec srcRI
134 let clusterToBranch = rangeToClusterToBranch Map.! srcR
135 dotLine $ "subgraph cluster_" <> srcRB
137 dotComments ["Create a node for the range " <> srcRB]
141 , ("label", builderQuotedString (string srcR))
143 , ("style", "filled")
144 , ("fillcolor", "gray")
149 dotComments ["Create the cluster nodes within the range " <> srcRB]
151 (srcCdstRSC & Map.toList)
152 \(srcC, _dstRSC) srcCI -> do
153 let (srcBI, srcBS) = clusterToBranch Map.! srcC
154 let showSimilarity s = showFFloat (Just 2) (s & runProbability & fromRational @Double) ""
155 dotNodeCluster srcRI srcCI
156 [ ("label", builderQuotedString $ string srcC <> "\nT" <> printf "%03d" srcBI <> maybe "" (("\n" <>) . showSimilarity) srcBS)
157 , ("style", "filled")
158 , srcBS & maybe ("", "") (\s -> ("fillcolor", (floor (runProbability s * 10)) `mod` 10 & BS.intDec))
159 , ("colorscheme", "ylorrd9")
162 dotComments ["Horizontally align the cluster nodes within the same range"]
163 when (1 <= Map.size srcCdstRSC) do
165 [srcRB, srcRB <> "c" <> BS.intDec 1]
167 forM_ (List.zip [1 .. Map.size srcCdstRSC - 1] [2 .. Map.size srcCdstRSC]) \(srcCI, dstCI) ->
168 dotEdgesCluster [(srcRI, dstTI, srcCI), (srcRI, dstTI, dstCI)]
173 indexFrom1M_ sortedMSF \mst mstI -> do
174 dotComments [ "Create the edges of the MST " <> BS.intDec mstI ]
175 --pTraceShowM (mstI, List.length (Tree.flatten mst))
176 let loop (Tree.Node MSTNode{mstNodeRangeCluster=src} dsts) = do
177 forM_ dsts \dstNode@(Tree.Node MSTNode{mstNodeRangeCluster=dst, mstNodeSimilarity=simil} _) -> do
178 let similB = BS.stringUtf8 $ showFFloat (Just 2) (simil & runProbability & fromRational @Double) ""
179 let indexRangeCluster (r,c) =
180 ( 1 + Map.findIndex r rangeToMSTToClusters
182 , 1 + Set.findIndex c (rangeToMSTToClusters Map.! r Map.! mstI & snd)
185 [ indexRangeCluster src
186 , indexRangeCluster dst
188 [ ("constraint", "false")
189 , ("color", (floor (runProbability simil * 10)) `mod` 10 & BS.intDec)
190 , ("colorscheme", "ylorrd9")
191 --, ("label", similB)
192 , ("fontcolor", "blue")
194 , ("arrowhead", "dot")
195 , ("arrowtail", "dot")
201 (srcCdstRSC & Map.toList)
202 \(srcC, dstRSC) srcCI -> do
203 -- TODO: print only the closest downstream/upstream edge of the branch
204 forM_ [directionDownstream] \stream ->
205 forM_ (stream dstRSC & Map.toList) \(dstR, dstSC) -> do
206 let dstRI = 1 + Map.findIndex dstR srcRCdstRSC
207 let (srcBI, srcBS) = clusterToBranch Map.! srcC
208 forM_ (dstSC & Map.toList) \(simil, dstCs) -> do
209 when (srcBS <= simil) do
210 forM_ dstCs \dstC -> do
211 let (dstBI, dstBS) = rangeToClusterToBranch Map.! dstR Map.! dstC
212 when (srcBI == dstBI) do
213 let dstCI = 1 + Map.findIndex dstC (srcRCdstRSC Map.! dstR)
214 let srcRCB = mconcat [srcRB, "c", BS.intDec srcCI]
215 let dstRCB = mconcat ["r", BS.intDec dstRI, "c", BS.intDec dstCI]
216 let similB = showFFloat @Double (Just 2) (fromRational (runProbability simil)) ""
219 [ ("weight", BS.stringUtf8 similB)
220 , ("label", builderQuotedString $ similB)
221 , ("fontcolor", "gray60")
222 , ("constraint", "false")
225 dotRanges srcRCdstRSC
227 dotRanges :: range :-> a -> DOT
228 dotRanges rangeTo = do
229 dotComments ["Vertically align range nodes"]
231 [ "r" <> BS.intDec srcRI
232 | srcRI <- [1 .. Map.size rangeTo]
234 when (1 < List.length rangeLinks) do
235 dotEdges rangeLinks [("weight", "10"), ("style", "invis")]
237 dotNodeCluster :: Int -> Int -> Int -> [(BS.Builder, BS.Builder)] -> DOT
238 dotNodeCluster r t c = dotNode ("r" <> BS.intDec r <> "t" <> BS.intDec t <> "c" <> BS.intDec c)
240 dotEdgesCluster :: [(Int, Int, Int)] -> [(BS.Builder, BS.Builder)] -> DOT
241 dotEdgesCluster rtc =
243 [ ("r" <> BS.intDec r <> "t" <> BS.intDec t <> "c" <> BS.intDec c)
248 dotMaximalSpanningTrees ::
249 forall range cluster.
255 Stringable cluster =>
256 MaximalSpanningTrees range cluster ->
258 dotMaximalSpanningTrees msf = runDOT do
260 let rangeToClusters :: range :-> Set cluster =
261 Map.unionsWith Set.union
262 [ Map.fromListWith Set.union
263 [ (range, Set.singleton cluster)
264 | MSTNode{mstNodeRangeCluster=(range, cluster)} <- mst & toList
269 dotLine "splines=\"ortho\""
270 indexFrom1M_ (rangeToClusters & Map.toList) \(srcR, srcCs) srcRI -> do
271 let srcRB = "r" <> BS.intDec srcRI
272 dotLine $ "subgraph cluster_" <> srcRB
274 dotComments ["Create a node for the range " <> srcRB]
278 , ("label", builderQuotedString (string srcR))
280 , ("style", "filled")
281 , ("fillcolor", "gray")
286 dotComments ["Create the cluster nodes within the range " <> srcRB]
287 indexFrom1M_ (srcCs & toList) \srcC srcCI -> do
288 dotNodeCluster srcRI srcCI
289 [ ("label", builderQuotedString (string srcC))
291 dotComments ["Horizontally align the cluster nodes within the same range"]
292 when (1 <= Set.size srcCs) do
294 [srcRB, srcRB <> "c" <> BS.intDec 1]
297 forM_ (List.zip [1 .. Set.size srcCs - 1] [2 .. Set.size srcCs]) \(srcCI, dstCI) ->
298 dotEdgesCluster [(srcRI, srcCI), (srcRI, dstCI)]
302 dotRanges rangeToClusters
303 dotComments ["Create the inter-range edges between clusters"]
304 indexFrom1M_ msf \mst _mstI ->
305 let loop (Tree.Node MSTNode{mstNodeRangeCluster=src} dsts) = do
306 forM_ dsts \dstTree@(Tree.Node MSTNode{mstNodeRangeCluster=dst, mstNodeSimilarity=simil} _) -> do
307 let similB = BS.doubleDec (fromRational (runProbability simil))
308 let indexRangeCluster (r,c) =
309 ( 1 + Map.findIndex r rangeToClusters
310 , 1 + Set.findIndex c (rangeToClusters Map.! r)
313 [ indexRangeCluster src
314 , indexRangeCluster dst
316 [ ("constraint", "false")
319 , ("fontcolor", "blue")
325 -- nodeBranchIndex ::
328 -- MaximalSpanningTrees range cluster ->
329 -- (range, cluster) ->
331 -- nodeBranchIndex rootToMST node =
338 -- (\(src, dsts) -> src == node || List.elem node dsts)
339 -- (links & Map.toList)
343 -- (rootToMST & Map.toList)
345 class Stringable a where
346 string :: a -> String
347 instance Stringable (Set.Set Root) where
349 mconcat (List.intersperse " & " (as <&> TS.unpack))
351 as = a & Set.toList <&> unNgram . rootLabel
352 instance Stringable Int where
355 type DOT = MT.ReaderT BSh.ShortByteString (MT.Writer BS.Builder) ()
357 indexFrom1M_ :: Applicative m => [a] -> (a -> Int -> m b) -> m ()
358 indexFrom1M_ xs f = zipWithM_ f xs [1 :: Int ..]
360 runDOT :: DOT -> BS.Builder
361 runDOT = MT.execWriter . (`MT.runReaderT` {-indent-} "")
363 dotBlock :: DOT -> DOT
366 () <- MT.withReaderT (" " <>) s
369 dotLine :: BS.Builder -> DOT
372 MT.lift $ MT.tell $ BS.shortByteString indent <> s <> "\n"
374 dotComments :: [BS.Builder] -> DOT
375 dotComments = mapM_ \c -> dotLine $ "// " <> c
377 dotEdges :: [BS.Builder] -> [(BS.Builder, BS.Builder)] -> DOT
378 dotEdges names as = dotLine $ mconcat (List.intersperse " -> " names) <> builderAttrs as
380 dotNode :: BS.Builder -> [(BS.Builder, BS.Builder)] -> DOT
381 dotNode name as = dotLine $ name <> builderAttrs as
383 builderAttrs :: [(BS.Builder, BS.Builder)] -> BS.Builder
386 | otherwise = "[" <> mconcat (List.intersperse "," [k <> "=" <> v | (k, v) <- as, BS.toLazyByteString k /= ""]) <> "]"
388 builderQuotedString :: String -> BS.Builder
389 builderQuotedString cs = BS.charUtf8 '"' <> foldMap' escape cs <> BS.charUtf8 '"'
391 escape '\\' = BS.charUtf8 '\\' <> BS.charUtf8 '\\'
392 escape '\"' = BS.charUtf8 '\\' <> BS.charUtf8 '\"'
393 escape c = BS.charUtf8 c