]> Git — Sourcephile - literate-phylomemy.git/blob - src/Phylomemy/DOT.hs
completeness(scale): add support for scale
[literate-phylomemy.git] / src / Phylomemy / DOT.hs
1 {-# LANGUAGE OverloadedStrings #-}
2
3 module Phylomemy.DOT where
4
5 import Control.Applicative (Applicative (..))
6 import Control.Monad (Monad (..), foldM_, forM_, mapM_, when, zipWithM_)
7 import Control.Monad.Trans.Class qualified as MT
8 import Control.Monad.Trans.Reader qualified as MT
9 import Control.Monad.Trans.Writer.CPS qualified as MT
10 import Data.Bool (otherwise, (&&))
11 import Data.ByteString.Builder qualified as BS
12 import Data.ByteString.Short qualified as BSh
13 import Data.Eq (Eq (..))
14 import Data.Foldable (foldMap', toList)
15 import Data.Function (id, on, ($), (&), (.))
16 import Data.Functor ((<&>))
17 import Data.Int (Int)
18 import Data.List qualified as List
19 import Data.Map.Strict qualified as Map
20 import Data.Maybe (Maybe (..), fromMaybe)
21 import Data.Monoid (Monoid (..))
22 import Data.Ord (Ord (..))
23 import Data.Semigroup (Min (..), Semigroup (..))
24 import Data.Set (Set)
25 import Data.Set qualified as Set
26 import Data.String (String)
27 import Data.Text.Lazy qualified as LazyText
28 import Data.Text.Lazy.Encoding qualified as LazyText
29 import Data.Text.Short qualified as TS
30 import Data.Tree qualified as Tree
31 import Debug.Pretty.Simple (pTraceShow)
32 import GHC.Real (floor)
33 import GHC.Stack (HasCallStack)
34 import Numeric (showFFloat)
35 import Numeric.Probability
36 import Text.Pretty.Simple (pShow)
37 import Text.Printf (printf)
38 import Text.Show (Show (..))
39 import Prelude
40
41 import Phylomemy.Indexation
42 import Phylomemy.TemporalMatching
43
44 type MST = Int
45 type ClusterGroup = Int
46
47 -- | @(`dotMaximalSpanningForest` scaleToMsf)@
48 -- returns a graph of the given `MaximalSpanningForest`
49 -- in [DOT](https://graphviz.org/doc/info/lang.html) format.
50 --
51 -- Each maximal spanning tree is here pruned of its edges
52 -- from the lowest `Similarity` it contains, to the highest,
53 -- each level is called a `Scale`, and groups `(range, cluster)` differently,
54 -- hence this actually returns a `Map.Map` of `Scale` to graph.
55 dotMaximalSpanningForest ::
56 forall range cluster.
57 HasCallStack =>
58 cluster ~ Cluster =>
59 Show range =>
60 Show cluster =>
61 Ord range =>
62 Ord cluster =>
63 ShowHuman range =>
64 ShowHuman cluster =>
65 MaximalSpanningForest range cluster ->
66 Int :-> BS.Builder
67 dotMaximalSpanningForest msf =
68 -- pTraceShow ("scaleToRangeToMstToGroupToClusters", Map.findMin scaleToRangeToMstToGroupToClusters) $
69 scaleToRangeToMstToGroupToClusters
70 & Map.mapWithKey \scaleI rangeToMstToGroupToClusters -> runDOT do
71 -- forM_ (similToRangeToMstToGroupToClusters & Map.toList) \(minSimil, rangeToMstToGroupToClusters) -> do
72 dotComments [(BS.lazyByteString $ LazyText.encodeUtf8 $ pShow $ rangeToMstToGroupToClusters & Map.map Map.keys)]
73 -- pTraceShow ("num of nodes", Map.size nodeToBranch, "num of branches", Map.size msf) $
74 dotLine "digraph g"
75 dotBlock do
76 dotLine "splines=\"ortho\""
77 indexFrom1M_ (rangeToMstToGroupToClusters & Map.toList) \(srcR, mstToGroupToClusters) srcRI -> do
78 let srcRB = "r" <> BS.intDec srcRI
79 dotLine $ "subgraph cluster_" <> srcRB
80 dotBlock do
81 dotComments ["Create a node for the range " <> srcRB]
82 dotNode
83 srcRB
84 [ ("shape", "box")
85 , ("label", builderQuotedString (showHuman srcR))
86 , ("color", "gray")
87 , ("style", "filled")
88 , ("fillcolor", "gray")
89 ]
90 dotLine "color=gray"
91 dotBlock do
92 dotLine "rank=same"
93 dotComments ["Create the cluster nodes within the range " <> srcRB]
94 forM_ (mstToGroupToClusters & Map.toList) \(mstI, groupToClusters) -> do
95 forM_ (groupToClusters & Map.toList) \(srcGroup, srcClusters) -> do
96 dotNodeCluster
97 srcRI
98 mstI
99 srcGroup
100 [
101 ( "label"
102 , builderQuotedString $
103 (srcClusters & toList <&> showHuman & List.unlines)
104 <> "\nT"
105 <> printf "%03d" mstI
106 <> "\nS"
107 <> show scaleI
108 -- <> {-maybe ""-} (("\n" <>) . showSimilarity) minSimil
109 )
110 , ("style", "filled")
111 , -- , minSimil & {-maybe ("", "")-} (\s -> ("fillcolor", 1 + ((floor (runProbability s * 10)) `mod` 10) & BS.intDec))
112 ("colorscheme", "ylorrd9")
113 , ("shape", "box")
114 ]
115 dotComments ["Horizontally align the cluster nodes within the same range"]
116 let row =
117 [ (mstI, group)
118 | (mstI, groupToClusters) <- mstToGroupToClusters & Map.toList
119 , (group, _clusters) <- groupToClusters & Map.toList
120 ]
121 case row of
122 [] -> return ()
123 c@(firstMst, firstGroup) : cs -> do
124 dotEdges
125 [srcRB, srcRB <> "t" <> BS.intDec firstMst <> "c" <> BS.intDec firstGroup]
126 [ ("style", "invis")
127 ]
128 cs & (`foldM_` c) \(srcMst, srcGroup) dst@(dstMst, dstGroup) -> do
129 dotEdgesCluster
130 [(srcRI, srcMst, srcGroup), (srcRI, dstMst, dstGroup)]
131 [ ("weight", "10")
132 , ("style", "invis")
133 ]
134 return dst
135 indexFrom1M_ sortedMSF \mst mstI -> do
136 dotComments ["Create the edges of the MST " <> BS.intDec mstI]
137 -- pTraceShowM (mstI, List.length (Tree.flatten mst))
138 let loop (Tree.Node MSTNode{mstNodeRangeCluster = src} dsts) = do
139 forM_ dsts \dstNode@(Tree.Node MSTNode{mstNodeRangeCluster = dst, mstNodeSimilarity = simil} _) -> do
140 -- let similB = BS.stringUtf8 $ showFFloat (Just 2) (simil & runProbability & fromRational @Double) ""
141 let indexRangeCluster (r, c) =
142 let clusterToGroup :: cluster :-> ClusterGroup =
143 Map.fromList
144 [ (cluster, group)
145 | (group, clusters) <-
146 rangeToMstToGroupToClusters
147 & Map.lookup r
148 & fromMaybe Map.empty
149 & Map.lookup mstI
150 & fromMaybe Map.empty
151 & Map.toList
152 , cluster <- clusters & Set.toList
153 ]
154 in ( 1 + Map.findIndex r rangeToMstToGroupToClusters
155 , mstI
156 , Map.lookup c clusterToGroup
157 & fromMaybe (error (LazyText.unpack (pShow ("r", r, "c", c {-, "clusterToGroup", clusterToGroup, "rangeToMstToGroupToClusters", rangeToMstToGroupToClusters-}))))
158 )
159 dotEdgesCluster
160 [ indexRangeCluster src
161 , indexRangeCluster dst
162 ]
163 [ ("constraint", "false")
164 , ("color", (floor (runProbability simil * 10)) `mod` 10 & BS.intDec)
165 , ("colorscheme", "ylorrd9")
166 , -- , ("label", similB)
167 ("fontcolor", "blue")
168 , ("dir", "both")
169 , ("arrowhead", "dot")
170 , ("arrowtail", "dot")
171 ]
172 loop dstNode
173 loop mst
174 dotRanges rangeToMstToGroupToClusters
175 where
176 -- TODO: improve this, this is just a quick attempt to stabilize the DOT
177 sortedMSF = msf & List.sortBy (compare `on` mstNodeRangeCluster . Tree.rootLabel)
178 -- Deep remapping of the `msf` to something more suitable for generating the DOT
179 scaleToRangeToMstToGroupToClusters ::
180 {-scale-} Int :-> range :-> MST :-> ClusterGroup :-> Set cluster =
181 let merge = Map.unionWith (Map.unionWith (Map.unionWith Set.union))
182 in Map.unionsWith
183 merge
184 [ Map.unionsWith
185 merge
186 [ Map.unionsWith
187 merge
188 [ Map.fromListWith merge $
189 [ {-(if scaleI == 3
190 then pTraceShow (["scaleToRangeToMstToGroupToClusters"], "scaleI", scaleI, "scaleSimil", scaleSimil, "range", range, "mstI", mstI, "clusterGroup", clusterGroup, "cluster", cluster)
191 else id) $-}
192 (scaleI,) $
193 Map.singleton range $
194 Map.singleton mstI $
195 Map.singleton clusterGroup $
196 Set.singleton cluster
197 | MSTNode{mstNodeRangeCluster = (range, cluster)} <- scaleMST & toList
198 ]
199 | (clusterGroup, scaleMST) <-
200 -- (if mstI == 2 then pTraceShow ("scaleI", scaleI, "scaleMSF", scaleMSF) else id) $
201 scaleMsf & List.zip [1 :: ClusterGroup ..]
202 ]
203 | (mstI, scaleMsf) <- mstToMsf & Map.toList
204 ]
205 | (scaleI, mstToMsf) <- mstScales sortedMSF & Map.toList
206 ]
207 showSimilarity (s :: Similarity) = showFFloat (Just 2) (s & runProbability & fromRational @Double) ""
208
209 dotRanges :: range :-> a -> DOT
210 dotRanges rangeTo = do
211 dotComments ["Vertically align range nodes"]
212 let rangeLinks =
213 [ "r" <> BS.intDec srcRI
214 | srcRI <- [1 .. Map.size rangeTo]
215 ]
216 when (1 < List.length rangeLinks) do
217 dotEdges rangeLinks [("weight", "10"), ("style", "invis")]
218
219 dotNodeCluster :: Int -> Int -> Int -> [(BS.Builder, BS.Builder)] -> DOT
220 dotNodeCluster r t c = dotNode ("r" <> BS.intDec r <> "t" <> BS.intDec t <> "c" <> BS.intDec c)
221
222 dotEdgesCluster :: [(Int, Int, Int)] -> [(BS.Builder, BS.Builder)] -> DOT
223 dotEdgesCluster rtc =
224 dotEdges
225 [ ("r" <> BS.intDec r <> "t" <> BS.intDec t <> "c" <> BS.intDec c)
226 | (r, t, c) <- rtc
227 ]
228
229 -- Alternative to `Show` to get a more human-readable `String`.
230 class ShowHuman a where
231 showHuman :: a -> String
232 instance ShowHuman (Set.Set Root) where
233 showHuman a =
234 mconcat (List.intersperse " & " (as <&> TS.unpack))
235 where
236 as = a & Set.toList <&> unNgram . rootLabel
237 instance ShowHuman Int where
238 showHuman = show
239
240 type DOT = MT.ReaderT BSh.ShortByteString (MT.Writer BS.Builder) ()
241
242 indexFrom1M_ :: Applicative m => [a] -> (a -> Int -> m b) -> m ()
243 indexFrom1M_ xs f = zipWithM_ f xs [1 :: Int ..]
244
245 runDOT :: DOT -> BS.Builder
246 runDOT = MT.execWriter . (`MT.runReaderT` {-indent-} "")
247
248 dotBlock :: DOT -> DOT
249 dotBlock s = do
250 dotLine "{"
251 () <- MT.withReaderT (" " <>) s
252 dotLine "}"
253
254 dotLine :: BS.Builder -> DOT
255 dotLine s = do
256 indent <- MT.ask
257 MT.lift $ MT.tell $ BS.shortByteString indent <> s <> "\n"
258
259 dotComments :: [BS.Builder] -> DOT
260 dotComments cs = do
261 dotLine "/*"
262 forM_ cs dotLine
263 dotLine "*/"
264
265 dotEdges :: [BS.Builder] -> [(BS.Builder, BS.Builder)] -> DOT
266 dotEdges names as = dotLine $ mconcat (List.intersperse " -> " names) <> builderAttrs as
267
268 dotNode :: BS.Builder -> [(BS.Builder, BS.Builder)] -> DOT
269 dotNode name as = dotLine $ name <> builderAttrs as
270
271 builderAttrs :: [(BS.Builder, BS.Builder)] -> BS.Builder
272 builderAttrs as
273 | List.null as = ""
274 | otherwise = "[" <> mconcat (List.intersperse "," [k <> "=" <> v | (k, v) <- as, BS.toLazyByteString k /= ""]) <> "]"
275
276 builderQuotedString :: String -> BS.Builder
277 builderQuotedString cs = BS.charUtf8 '"' <> foldMap' escape cs <> BS.charUtf8 '"'
278 where
279 escape '\\' = BS.charUtf8 '\\' <> BS.charUtf8 '\\'
280 escape '\"' = BS.charUtf8 '\\' <> BS.charUtf8 '\"'
281 escape c = BS.charUtf8 c