1 {-# LANGUAGE OverloadedStrings #-}
3 module Phylomemy.DOT where
5 import Control.Applicative (Applicative (..))
6 import Control.Monad (Monad (..), foldM_, forM_, mapM_, when, zipWithM_)
7 import Control.Monad.Trans.Class qualified as MT
8 import Control.Monad.Trans.Reader qualified as MT
9 import Control.Monad.Trans.Writer.CPS qualified as MT
10 import Data.Bool (otherwise, (&&))
11 import Data.ByteString.Builder qualified as BS
12 import Data.ByteString.Short qualified as BSh
13 import Data.Eq (Eq (..))
14 import Data.Foldable (foldMap', toList)
15 import Data.Function (id, on, ($), (&), (.))
16 import Data.Functor ((<&>))
18 import Data.List qualified as List
19 import Data.Map.Strict qualified as Map
20 import Data.Maybe (Maybe (..), fromMaybe)
21 import Data.Monoid (Monoid (..))
22 import Data.Ord (Ord (..))
23 import Data.Semigroup (Min (..), Semigroup (..))
25 import Data.Set qualified as Set
26 import Data.String (String)
27 import Data.Text.Lazy qualified as LazyText
28 import Data.Text.Lazy.Encoding qualified as LazyText
29 import Data.Text.Short qualified as TS
30 import Data.Tree qualified as Tree
31 import Debug.Pretty.Simple (pTraceShow)
32 import GHC.Real (floor)
33 import GHC.Stack (HasCallStack)
34 import Numeric (showFFloat)
35 import Numeric.Probability
36 import Text.Pretty.Simple (pShow)
37 import Text.Printf (printf)
38 import Text.Show (Show (..))
41 import Phylomemy.Indexation
42 import Phylomemy.TemporalMatching
45 type ClusterGroup = Int
47 -- | @(`dotMaximalSpanningForest` scaleToMsf)@
48 -- returns a graph of the given `MaximalSpanningForest`
49 -- in [DOT](https://graphviz.org/doc/info/lang.html) format.
51 -- Each maximal spanning tree is here pruned of its edges
52 -- from the lowest `Similarity` it contains, to the highest,
53 -- each level is called a `Scale`, and groups `(range, cluster)` differently,
54 -- hence this actually returns a `Map.Map` of `Scale` to graph.
55 dotMaximalSpanningForest ::
65 MaximalSpanningForest range cluster ->
67 dotMaximalSpanningForest msf =
68 -- pTraceShow ("scaleToRangeToMstToGroupToClusters", Map.findMin scaleToRangeToMstToGroupToClusters) $
69 scaleToRangeToMstToGroupToClusters
70 & Map.mapWithKey \scaleI rangeToMstToGroupToClusters -> runDOT do
71 -- forM_ (similToRangeToMstToGroupToClusters & Map.toList) \(minSimil, rangeToMstToGroupToClusters) -> do
72 dotComments [(BS.lazyByteString $ LazyText.encodeUtf8 $ pShow $ rangeToMstToGroupToClusters & Map.map Map.keys)]
73 -- pTraceShow ("num of nodes", Map.size nodeToBranch, "num of branches", Map.size msf) $
76 dotLine "splines=\"ortho\""
77 indexFrom1M_ (rangeToMstToGroupToClusters & Map.toList) \(srcR, mstToGroupToClusters) srcRI -> do
78 let srcRB = "r" <> BS.intDec srcRI
79 dotLine $ "subgraph cluster_" <> srcRB
81 dotComments ["Create a node for the range " <> srcRB]
85 , ("label", builderQuotedString (showHuman srcR))
88 , ("fillcolor", "gray")
93 dotComments ["Create the cluster nodes within the range " <> srcRB]
94 forM_ (mstToGroupToClusters & Map.toList) \(mstI, groupToClusters) -> do
95 forM_ (groupToClusters & Map.toList) \(srcGroup, srcClusters) -> do
102 , builderQuotedString $
103 (srcClusters & toList <&> showHuman & List.unlines)
105 <> printf "%03d" mstI
108 -- <> {-maybe ""-} (("\n" <>) . showSimilarity) minSimil
110 , ("style", "filled")
111 , -- , minSimil & {-maybe ("", "")-} (\s -> ("fillcolor", 1 + ((floor (runProbability s * 10)) `mod` 10) & BS.intDec))
112 ("colorscheme", "ylorrd9")
115 dotComments ["Horizontally align the cluster nodes within the same range"]
118 | (mstI, groupToClusters) <- mstToGroupToClusters & Map.toList
119 , (group, _clusters) <- groupToClusters & Map.toList
123 c@(firstMst, firstGroup) : cs -> do
125 [srcRB, srcRB <> "t" <> BS.intDec firstMst <> "c" <> BS.intDec firstGroup]
128 cs & (`foldM_` c) \(srcMst, srcGroup) dst@(dstMst, dstGroup) -> do
130 [(srcRI, srcMst, srcGroup), (srcRI, dstMst, dstGroup)]
135 indexFrom1M_ sortedMSF \mst mstI -> do
136 dotComments ["Create the edges of the MST " <> BS.intDec mstI]
137 -- pTraceShowM (mstI, List.length (Tree.flatten mst))
138 let loop (Tree.Node MSTNode{mstNodeRangeCluster = src} dsts) = do
139 forM_ dsts \dstNode@(Tree.Node MSTNode{mstNodeRangeCluster = dst, mstNodeSimilarity = simil} _) -> do
140 -- let similB = BS.stringUtf8 $ showFFloat (Just 2) (simil & runProbability & fromRational @Double) ""
141 let indexRangeCluster (r, c) =
142 let clusterToGroup :: cluster :-> ClusterGroup =
145 | (group, clusters) <-
146 rangeToMstToGroupToClusters
148 & fromMaybe Map.empty
150 & fromMaybe Map.empty
152 , cluster <- clusters & Set.toList
154 in ( 1 + Map.findIndex r rangeToMstToGroupToClusters
156 , Map.lookup c clusterToGroup
157 & fromMaybe (error (LazyText.unpack (pShow ("r", r, "c", c {-, "clusterToGroup", clusterToGroup, "rangeToMstToGroupToClusters", rangeToMstToGroupToClusters-}))))
160 [ indexRangeCluster src
161 , indexRangeCluster dst
163 [ ("constraint", "false")
164 , ("color", (floor (runProbability simil * 10)) `mod` 10 & BS.intDec)
165 , ("colorscheme", "ylorrd9")
166 , -- , ("label", similB)
167 ("fontcolor", "blue")
169 , ("arrowhead", "dot")
170 , ("arrowtail", "dot")
174 dotRanges rangeToMstToGroupToClusters
176 -- TODO: improve this, this is just a quick attempt to stabilize the DOT
177 sortedMSF = msf & List.sortBy (compare `on` mstNodeRangeCluster . Tree.rootLabel)
178 -- Deep remapping of the `msf` to something more suitable for generating the DOT
179 scaleToRangeToMstToGroupToClusters ::
180 {-scale-} Int :-> range :-> MST :-> ClusterGroup :-> Set cluster =
181 let merge = Map.unionWith (Map.unionWith (Map.unionWith Set.union))
188 [ Map.fromListWith merge $
190 then pTraceShow (["scaleToRangeToMstToGroupToClusters"], "scaleI", scaleI, "scaleSimil", scaleSimil, "range", range, "mstI", mstI, "clusterGroup", clusterGroup, "cluster", cluster)
193 Map.singleton range $
195 Map.singleton clusterGroup $
196 Set.singleton cluster
197 | MSTNode{mstNodeRangeCluster = (range, cluster)} <- scaleMST & toList
199 | (clusterGroup, scaleMST) <-
200 -- (if mstI == 2 then pTraceShow ("scaleI", scaleI, "scaleMSF", scaleMSF) else id) $
201 scaleMsf & List.zip [1 :: ClusterGroup ..]
203 | (mstI, scaleMsf) <- mstToMsf & Map.toList
205 | (scaleI, mstToMsf) <- mstScales sortedMSF & Map.toList
207 showSimilarity (s :: Similarity) = showFFloat (Just 2) (s & runProbability & fromRational @Double) ""
209 dotRanges :: range :-> a -> DOT
210 dotRanges rangeTo = do
211 dotComments ["Vertically align range nodes"]
213 [ "r" <> BS.intDec srcRI
214 | srcRI <- [1 .. Map.size rangeTo]
216 when (1 < List.length rangeLinks) do
217 dotEdges rangeLinks [("weight", "10"), ("style", "invis")]
219 dotNodeCluster :: Int -> Int -> Int -> [(BS.Builder, BS.Builder)] -> DOT
220 dotNodeCluster r t c = dotNode ("r" <> BS.intDec r <> "t" <> BS.intDec t <> "c" <> BS.intDec c)
222 dotEdgesCluster :: [(Int, Int, Int)] -> [(BS.Builder, BS.Builder)] -> DOT
223 dotEdgesCluster rtc =
225 [ ("r" <> BS.intDec r <> "t" <> BS.intDec t <> "c" <> BS.intDec c)
229 -- Alternative to `Show` to get a more human-readable `String`.
230 class ShowHuman a where
231 showHuman :: a -> String
232 instance ShowHuman (Set.Set Root) where
234 mconcat (List.intersperse " & " (as <&> TS.unpack))
236 as = a & Set.toList <&> unNgram . rootLabel
237 instance ShowHuman Int where
240 type DOT = MT.ReaderT BSh.ShortByteString (MT.Writer BS.Builder) ()
242 indexFrom1M_ :: Applicative m => [a] -> (a -> Int -> m b) -> m ()
243 indexFrom1M_ xs f = zipWithM_ f xs [1 :: Int ..]
245 runDOT :: DOT -> BS.Builder
246 runDOT = MT.execWriter . (`MT.runReaderT` {-indent-} "")
248 dotBlock :: DOT -> DOT
251 () <- MT.withReaderT (" " <>) s
254 dotLine :: BS.Builder -> DOT
257 MT.lift $ MT.tell $ BS.shortByteString indent <> s <> "\n"
259 dotComments :: [BS.Builder] -> DOT
265 dotEdges :: [BS.Builder] -> [(BS.Builder, BS.Builder)] -> DOT
266 dotEdges names as = dotLine $ mconcat (List.intersperse " -> " names) <> builderAttrs as
268 dotNode :: BS.Builder -> [(BS.Builder, BS.Builder)] -> DOT
269 dotNode name as = dotLine $ name <> builderAttrs as
271 builderAttrs :: [(BS.Builder, BS.Builder)] -> BS.Builder
274 | otherwise = "[" <> mconcat (List.intersperse "," [k <> "=" <> v | (k, v) <- as, BS.toLazyByteString k /= ""]) <> "]"
276 builderQuotedString :: String -> BS.Builder
277 builderQuotedString cs = BS.charUtf8 '"' <> foldMap' escape cs <> BS.charUtf8 '"'
279 escape '\\' = BS.charUtf8 '\\' <> BS.charUtf8 '\\'
280 escape '\"' = BS.charUtf8 '\\' <> BS.charUtf8 '\"'
281 escape c = BS.charUtf8 c