]> Git — Sourcephile - literate-phylomemy.git/blob - src/Phylomemy/DOT.hs
init
[literate-phylomemy.git] / src / Phylomemy / DOT.hs
1 {-# LANGUAGE OverloadedStrings #-}
2
3 module Phylomemy.DOT where
4
5 import Control.Applicative (Applicative (..))
6 import Control.Monad (Monad (..), foldM_, forM_, mapM_, when, zipWithM_)
7 import Control.Monad.Trans.Class qualified as MT
8 import Control.Monad.Trans.Reader qualified as MT
9 import Control.Monad.Trans.Writer.CPS qualified as MT
10 import Data.Bool (otherwise)
11 import Data.ByteString.Builder qualified as BS
12 import Data.ByteString.Short qualified as BSh
13 import Data.Eq (Eq (..))
14 import Data.Foldable (foldMap', toList)
15 import Data.Function (on, ($), (&), (.))
16 import Data.Functor ((<&>))
17 import Data.Int (Int)
18 import Data.List qualified as List
19 import Data.Map.Strict qualified as Map
20 import Data.Maybe (Maybe (..), maybe)
21 import Data.Monoid (Monoid (..))
22 import Data.Ord (Ord (..))
23 import Data.Semigroup (Semigroup (..))
24 import Data.Set (Set)
25 import Data.Set qualified as Set
26 import Data.String (String)
27 import Data.Text.Short qualified as TS
28 import GHC.Real (floor)
29 import Data.Tuple (snd)
30 import Numeric (showFFloat)
31
32 -- import Debug.Pretty.Simple (pTraceShow, pTraceShowM)
33
34 import Data.Tree qualified as Tree
35 import Numeric.Probability
36 import Text.Printf (printf)
37 import Text.Show (Show (..))
38 import Prelude
39
40 import Phylomemy.Indexation
41 import Phylomemy.TemporalMatching
42
43 -- | @(`dotSimilarities` phy)@
44 -- returns a graph of the given `Phylomemy` in [DOT](https://graphviz.org/doc/info/lang.html) format.
45 --
46 -- TODO: order clusters of a range by their similarity
47 dotSimilarities ::
48 forall range cluster.
49 Show range =>
50 Show cluster =>
51 Ord range =>
52 Ord cluster =>
53 Stringable range =>
54 Stringable cluster =>
55 AllSimilarities range cluster ->
56 MaximalSpanningTrees range cluster ->
57 BS.Builder
58 dotSimilarities srcRCdstRSC msf = runDOT do
59 let sortedMSF = msf & List.sortBy (compare `on` (\(Tree.Node n _) -> mstNodeRangeCluster n))
60 let rangeToMSTToClusters :: range :-> {-mstI-} Int :-> (Maybe Similarity, Set cluster) =
61 let merge = Map.unionWith \(minSimil, x) (_minSimil, y) -> (minSimil, Set.union x y)
62 in Map.unionsWith
63 merge
64 [ Map.fromListWith merge $
65 case mst of
66 Tree.Node MSTNode{mstNodeRangeCluster = (rootR, rootC)} ts ->
67 (rootR, Map.singleton mstI (minSimil, Set.singleton rootC))
68 : [ (range, Map.singleton mstI (minSimil, Set.singleton cluster))
69 | MSTNode{mstNodeRangeCluster = (range, cluster)} <- ts & List.concatMap toList
70 ]
71 | (mstI, mst) <- sortedMSF & List.zip [1 :: Int ..]
72 , let minSimil = minimumSimilarity mst
73 ]
74 let showSimilarity (s :: Similarity) = showFFloat (Just 2) (s & runProbability & fromRational @Double) ""
75 dotComments [(BS.stringUtf8 $ show $ rangeToMSTToClusters & Map.map Map.keys)]
76 -- pTraceShow ("num of nodes", Map.size nodeToBranch, "num of branches", Map.size msf) $
77 dotLine "digraph g"
78 dotBlock do
79 dotLine "splines=\"ortho\""
80 indexFrom1M_ (rangeToMSTToClusters & Map.toList) \(srcR, mstToClusters) srcRI -> do
81 let srcRB = "r" <> BS.intDec srcRI
82 dotLine $ "subgraph cluster_" <> srcRB
83 dotBlock do
84 dotComments ["Create a node for the range " <> srcRB]
85 dotNode
86 srcRB
87 [ ("shape", "box")
88 , ("label", builderQuotedString (string srcR))
89 , ("color", "gray")
90 , ("style", "filled")
91 , ("fillcolor", "gray")
92 ]
93 dotLine "color=gray"
94 dotBlock do
95 dotLine "rank=same"
96 dotComments ["Create the cluster nodes within the range " <> srcRB]
97 forM_ (mstToClusters & Map.toList) \(mstI, (minSimil, clusters)) -> do
98 indexFrom1M_ (clusters & toList) \srcC srcCI -> do
99 dotNodeCluster
100 srcRI
101 mstI
102 srcCI
103 [ ("label", builderQuotedString $ string srcC <> "\nT" <> printf "%03d" mstI <> maybe "" (("\n" <>) . showSimilarity) minSimil)
104 , ("style", "filled")
105 , minSimil & maybe ("", "") (\s -> ("fillcolor", 1 + ((floor (runProbability s * 10)) `mod` 10) & BS.intDec))
106 , ("colorscheme", "ylorrd9")
107 , ("shape", "box")
108 ]
109 dotComments ["Horizontally align the cluster nodes within the same range"]
110 let row =
111 [ (mstI, clusterI)
112 | (mstI, (_minSimil, clusters)) <- mstToClusters & Map.toList
113 , clusterI <- [1 .. Set.size clusters]
114 ]
115 case row of
116 [] -> return ()
117 c@(firstTI, firstCI) : cs -> do
118 dotEdges
119 [srcRB, srcRB <> "t" <> BS.intDec firstTI <> "c" <> BS.intDec firstCI]
120 [ ("style", "invis")
121 ]
122 cs & (`foldM_` c) \(srcTI, srcCI) dst@(dstTI, dstCI) -> do
123 dotEdgesCluster
124 [(srcRI, srcTI, srcCI), (srcRI, dstTI, dstCI)]
125 [ ("weight", "10")
126 , ("style", "invis")
127 ]
128 return dst
129 {-
130 indexFrom1M_
131 (srcRCdstRSC & Map.toList)
132 \(srcR, srcCdstRSC) srcRI -> do
133 let srcRB = "r" <> BS.intDec srcRI
134 let clusterToBranch = rangeToClusterToBranch Map.! srcR
135 dotLine $ "subgraph cluster_" <> srcRB
136 dotBlock do
137 dotComments ["Create a node for the range " <> srcRB]
138 dotNode
139 srcRB
140 [ ("shape", "box")
141 , ("label", builderQuotedString (string srcR))
142 , ("color", "gray")
143 , ("style", "filled")
144 , ("fillcolor", "gray")
145 ]
146 dotLine "color=gray"
147 dotBlock do
148 dotLine "rank=same"
149 dotComments ["Create the cluster nodes within the range " <> srcRB]
150 indexFrom1M_
151 (srcCdstRSC & Map.toList)
152 \(srcC, _dstRSC) srcCI -> do
153 let (srcBI, srcBS) = clusterToBranch Map.! srcC
154 let showSimilarity s = showFFloat (Just 2) (s & runProbability & fromRational @Double) ""
155 dotNodeCluster srcRI srcCI
156 [ ("label", builderQuotedString $ string srcC <> "\nT" <> printf "%03d" srcBI <> maybe "" (("\n" <>) . showSimilarity) srcBS)
157 , ("style", "filled")
158 , srcBS & maybe ("", "") (\s -> ("fillcolor", (floor (runProbability s * 10)) `mod` 10 & BS.intDec))
159 , ("colorscheme", "ylorrd9")
160 , ("shape", "box")
161 ]
162 dotComments ["Horizontally align the cluster nodes within the same range"]
163 when (1 <= Map.size srcCdstRSC) do
164 dotEdges
165 [srcRB, srcRB <> "c" <> BS.intDec 1]
166 [("style", "invis")]
167 forM_ (List.zip [1 .. Map.size srcCdstRSC - 1] [2 .. Map.size srcCdstRSC]) \(srcCI, dstCI) ->
168 dotEdgesCluster [(srcRI, dstTI, srcCI), (srcRI, dstTI, dstCI)]
169 [ ("weight", "10")
170 , ("style", "invis")
171 ]
172 -}
173 indexFrom1M_ sortedMSF \mst mstI -> do
174 dotComments [ "Create the edges of the MST " <> BS.intDec mstI ]
175 --pTraceShowM (mstI, List.length (Tree.flatten mst))
176 let loop (Tree.Node MSTNode{mstNodeRangeCluster=src} dsts) = do
177 forM_ dsts \dstNode@(Tree.Node MSTNode{mstNodeRangeCluster=dst, mstNodeSimilarity=simil} _) -> do
178 let similB = BS.stringUtf8 $ showFFloat (Just 2) (simil & runProbability & fromRational @Double) ""
179 let indexRangeCluster (r,c) =
180 ( 1 + Map.findIndex r rangeToMSTToClusters
181 , mstI
182 , 1 + Set.findIndex c (rangeToMSTToClusters Map.! r Map.! mstI & snd)
183 )
184 dotEdgesCluster
185 [ indexRangeCluster src
186 , indexRangeCluster dst
187 ]
188 [ ("constraint", "false")
189 , ("color", (floor (runProbability simil * 10)) `mod` 10 & BS.intDec)
190 , ("colorscheme", "ylorrd9")
191 --, ("label", similB)
192 , ("fontcolor", "blue")
193 , ("dir", "both")
194 , ("arrowhead", "dot")
195 , ("arrowtail", "dot")
196 ]
197 loop dstNode
198 loop mst
199 {-
200 indexFrom1M_
201 (srcCdstRSC & Map.toList)
202 \(srcC, dstRSC) srcCI -> do
203 -- TODO: print only the closest downstream/upstream edge of the branch
204 forM_ [directionDownstream] \stream ->
205 forM_ (stream dstRSC & Map.toList) \(dstR, dstSC) -> do
206 let dstRI = 1 + Map.findIndex dstR srcRCdstRSC
207 let (srcBI, srcBS) = clusterToBranch Map.! srcC
208 forM_ (dstSC & Map.toList) \(simil, dstCs) -> do
209 when (srcBS <= simil) do
210 forM_ dstCs \dstC -> do
211 let (dstBI, dstBS) = rangeToClusterToBranch Map.! dstR Map.! dstC
212 when (srcBI == dstBI) do
213 let dstCI = 1 + Map.findIndex dstC (srcRCdstRSC Map.! dstR)
214 let srcRCB = mconcat [srcRB, "c", BS.intDec srcCI]
215 let dstRCB = mconcat ["r", BS.intDec dstRI, "c", BS.intDec dstCI]
216 let similB = showFFloat @Double (Just 2) (fromRational (runProbability simil)) ""
217 dotEdges
218 [srcRCB, dstRCB]
219 [ ("weight", BS.stringUtf8 similB)
220 , ("label", builderQuotedString $ similB)
221 , ("fontcolor", "gray60")
222 , ("constraint", "false")
223 ]
224 -}
225 dotRanges srcRCdstRSC
226
227 dotRanges :: range :-> a -> DOT
228 dotRanges rangeTo = do
229 dotComments ["Vertically align range nodes"]
230 let rangeLinks =
231 [ "r" <> BS.intDec srcRI
232 | srcRI <- [1 .. Map.size rangeTo]
233 ]
234 when (1 < List.length rangeLinks) do
235 dotEdges rangeLinks [("weight", "10"), ("style", "invis")]
236
237 dotNodeCluster :: Int -> Int -> Int -> [(BS.Builder, BS.Builder)] -> DOT
238 dotNodeCluster r t c = dotNode ("r" <> BS.intDec r <> "t" <> BS.intDec t <> "c" <> BS.intDec c)
239
240 dotEdgesCluster :: [(Int, Int, Int)] -> [(BS.Builder, BS.Builder)] -> DOT
241 dotEdgesCluster rtc =
242 dotEdges
243 [ ("r" <> BS.intDec r <> "t" <> BS.intDec t <> "c" <> BS.intDec c)
244 | (r, t, c) <- rtc
245 ]
246
247 {-
248 dotMaximalSpanningTrees ::
249 forall range cluster.
250 Show range =>
251 Show cluster =>
252 Ord range =>
253 Ord cluster =>
254 Stringable range =>
255 Stringable cluster =>
256 MaximalSpanningTrees range cluster ->
257 BS.Builder
258 dotMaximalSpanningTrees msf = runDOT do
259 dotLine "digraph g"
260 let rangeToClusters :: range :-> Set cluster =
261 Map.unionsWith Set.union
262 [ Map.fromListWith Set.union
263 [ (range, Set.singleton cluster)
264 | MSTNode{mstNodeRangeCluster=(range, cluster)} <- mst & toList
265 ]
266 | mst <- msf
267 ]
268 dotBlock do
269 dotLine "splines=\"ortho\""
270 indexFrom1M_ (rangeToClusters & Map.toList) \(srcR, srcCs) srcRI -> do
271 let srcRB = "r" <> BS.intDec srcRI
272 dotLine $ "subgraph cluster_" <> srcRB
273 dotBlock do
274 dotComments ["Create a node for the range " <> srcRB]
275 dotNode
276 srcRB
277 [ ("shape", "box")
278 , ("label", builderQuotedString (string srcR))
279 , ("color", "gray")
280 , ("style", "filled")
281 , ("fillcolor", "gray")
282 ]
283 dotLine "color=gray"
284 dotBlock do
285 dotLine "rank=same"
286 dotComments ["Create the cluster nodes within the range " <> srcRB]
287 indexFrom1M_ (srcCs & toList) \srcC srcCI -> do
288 dotNodeCluster srcRI srcCI
289 [ ("label", builderQuotedString (string srcC))
290 ]
291 dotComments ["Horizontally align the cluster nodes within the same range"]
292 when (1 <= Set.size srcCs) do
293 dotEdges
294 [srcRB, srcRB <> "c" <> BS.intDec 1]
295 [ ("style", "invis")
296 ]
297 forM_ (List.zip [1 .. Set.size srcCs - 1] [2 .. Set.size srcCs]) \(srcCI, dstCI) ->
298 dotEdgesCluster [(srcRI, srcCI), (srcRI, dstCI)]
299 [ ("weight", "10")
300 , ("style", "invis")
301 ]
302 dotRanges rangeToClusters
303 dotComments ["Create the inter-range edges between clusters"]
304 indexFrom1M_ msf \mst _mstI ->
305 let loop (Tree.Node MSTNode{mstNodeRangeCluster=src} dsts) = do
306 forM_ dsts \dstTree@(Tree.Node MSTNode{mstNodeRangeCluster=dst, mstNodeSimilarity=simil} _) -> do
307 let similB = BS.doubleDec (fromRational (runProbability simil))
308 let indexRangeCluster (r,c) =
309 ( 1 + Map.findIndex r rangeToClusters
310 , 1 + Set.findIndex c (rangeToClusters Map.! r)
311 )
312 dotEdgesCluster
313 [ indexRangeCluster src
314 , indexRangeCluster dst
315 ]
316 [ ("constraint", "false")
317 , ("color", "blue")
318 , ("label", similB)
319 , ("fontcolor", "blue")
320 ]
321 loop dstTree
322 in loop mst
323 -}
324
325 -- nodeBranchIndex ::
326 -- Eq range =>
327 -- Eq cluster =>
328 -- MaximalSpanningTrees range cluster ->
329 -- (range, cluster) ->
330 -- Maybe Int
331 -- nodeBranchIndex rootToMST node =
332 -- List.findIndex
333 -- ( \(root, mst) ->
334 -- root == node
335 -- || any
336 -- ( \links ->
337 -- any
338 -- (\(src, dsts) -> src == node || List.elem node dsts)
339 -- (links & Map.toList)
340 -- )
341 -- mst
342 -- )
343 -- (rootToMST & Map.toList)
344
345 class Stringable a where
346 string :: a -> String
347 instance Stringable (Set.Set Root) where
348 string a =
349 mconcat (List.intersperse " & " (as <&> TS.unpack))
350 where
351 as = a & Set.toList <&> unNgram . rootLabel
352 instance Stringable Int where
353 string = show
354
355 type DOT = MT.ReaderT BSh.ShortByteString (MT.Writer BS.Builder) ()
356
357 indexFrom1M_ :: Applicative m => [a] -> (a -> Int -> m b) -> m ()
358 indexFrom1M_ xs f = zipWithM_ f xs [1 :: Int ..]
359
360 runDOT :: DOT -> BS.Builder
361 runDOT = MT.execWriter . (`MT.runReaderT` {-indent-} "")
362
363 dotBlock :: DOT -> DOT
364 dotBlock s = do
365 dotLine "{"
366 () <- MT.withReaderT (" " <>) s
367 dotLine "}"
368
369 dotLine :: BS.Builder -> DOT
370 dotLine s = do
371 indent <- MT.ask
372 MT.lift $ MT.tell $ BS.shortByteString indent <> s <> "\n"
373
374 dotComments :: [BS.Builder] -> DOT
375 dotComments = mapM_ \c -> dotLine $ "// " <> c
376
377 dotEdges :: [BS.Builder] -> [(BS.Builder, BS.Builder)] -> DOT
378 dotEdges names as = dotLine $ mconcat (List.intersperse " -> " names) <> builderAttrs as
379
380 dotNode :: BS.Builder -> [(BS.Builder, BS.Builder)] -> DOT
381 dotNode name as = dotLine $ name <> builderAttrs as
382
383 builderAttrs :: [(BS.Builder, BS.Builder)] -> BS.Builder
384 builderAttrs as
385 | List.null as = ""
386 | otherwise = "[" <> mconcat (List.intersperse "," [k <> "=" <> v | (k, v) <- as, BS.toLazyByteString k /= ""]) <> "]"
387
388 builderQuotedString :: String -> BS.Builder
389 builderQuotedString cs = BS.charUtf8 '"' <> foldMap' escape cs <> BS.charUtf8 '"'
390 where
391 escape '\\' = BS.charUtf8 '\\' <> BS.charUtf8 '\\'
392 escape '\"' = BS.charUtf8 '\\' <> BS.charUtf8 '\"'
393 escape c = BS.charUtf8 c