]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Sync HTML5 rendition of DTC with new TCT parsing.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Ord (Ord(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
21 import Data.Set (Set)
22 import Data.TreeSeq.Strict (Tree(..), tree0)
23 import Data.Tuple (uncurry)
24 import Prelude (Num(..), undefined)
25 import qualified Data.Char as Char
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text.Lazy as TL
29 import qualified Language.TCT.Write.Plain as Plain
30 import qualified System.FilePath as FP
31
32 -- import Language.TCT.Debug
33 import Language.TCT hiding (Parser)
34 import Language.XML
35 import Text.Blaze.XML ()
36
37 -- | Main entry point
38 --
39 -- NOTE: 'XmlNode' are still annotated with 'Cell',
40 -- but nothing is done to preserve any ordering amongst them,
41 -- because 'Node's sometimes need to be reordered
42 -- (eg. about/title may have a title from the section before,
43 -- hence outside of about).
44 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
45 document :: Roots -> XMLs
46 document doc =
47 -- (`S.evalState` def) $
48 case Seq.viewl doc of
49 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
50 let (titles, content) = partitionSection sec in
51 case Seq.viewl titles of
52 (unTree -> Cell bt et _) :< _ ->
53 xmlify def
54 { inh_titles = titles
55 , inh_figure = True
56 } contentWithAbout <>
57 xmlify def foot
58 where
59 contentWithAbout =
60 case Seq.findIndexL isAbout content of
61 Nothing -> Tree (Cell bt et $ NodeHeader $ HeaderColon "about" "") mempty <| content
62 Just{} -> content
63 isAbout = \case
64 (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True
65 _ -> False
66 _ -> xmlify def doc
67 _ -> xmlify def doc
68
69 partitionSection :: Root -> (Roots, Roots)
70 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
71 case Seq.viewl body of
72 EmptyL -> mempty
73 title@(unTree -> Cell _bt et NodePara) :< rest ->
74 let (subtitles, content) = spanlSubtitles et rest in
75 (title <| (subtitles >>= subTrees), content)
76 where
77 spanlSubtitles ep ts =
78 case Seq.viewl ts of
79 sub@(unTree -> Cell bs es (NodeHeader (HeaderSection lvlSub))) :< rs
80 | lvlSub <= lvlPar
81 , pos_line bs - pos_line ep <= 1 ->
82 let (subs, ts') = spanlSubtitles es rs in
83 (sub <| subs, ts')
84 _ -> (mempty, ts)
85 _ -> (mempty, body)
86 partitionSection _ = mempty
87
88 -- * Type 'Inh'
89 data Inh
90 = Inh
91 { inh_figure :: Bool
92 , inh_para :: [Inh -> Root -> XML]
93 , inh_titles :: Roots
94 }
95 instance Default Inh where
96 def = Inh
97 { inh_figure = False
98 , inh_para = List.repeat elementPara
99 , inh_titles = mempty
100 }
101
102 -- ** 'inh_para'
103 elementPara :: Inh -> Root -> XML
104 elementPara inh (Tree c ts) = Tree (XmlElem "para" <$ c) $ xmlify inh ts
105
106 elementTitle :: Inh -> Root -> XML
107 elementTitle inh (Tree c ts) = Tree (XmlElem "title" <$ c) $ xmlify inh ts
108
109 elementName :: Inh -> Root -> XML
110 elementName inh (Tree c ts) = Tree (XmlElem "name" <$ c) $ xmlify inh ts
111
112 attributeName :: Inh -> Root -> XML
113 attributeName _inh (Tree c ts) = tree0 (XmlAttr "name" (Plain.document ts) <$ c)
114
115 -- * Class 'Xmlify'
116 class Xmlify a where
117 xmlify :: Inh -> a -> XMLs
118 instance Xmlify Roots where
119 xmlify inh roots =
120 case Seq.viewl roots of
121 EmptyL -> mempty
122 r@(Tree cr@(Cell _br _er nr) ts) :< rs ->
123 case nr of
124 ----------------------
125 -- NOTE: HeaderColon becomes parent
126 -- of any continuous following-sibling HeaderBar or HeaderGreat
127 NodeHeader (HeaderColon n _wh)
128 | (span, rest) <- spanlHeaderColon rs
129 , not $ null span ->
130 xmlify inh (Tree cr (ts<>span)) <>
131 xmlify inh rest
132 where
133 spanlHeaderColon :: Roots -> (Roots, Roots)
134 spanlHeaderColon =
135 Seq.spanl $ \case
136 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
137 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
138 _ -> False
139 ----------------------
140 -- NOTE: gather HeaderBrackets
141 NodeHeader HeaderBrackets{}
142 | (span,rest) <- spanlBrackets roots
143 , not (null span) ->
144 (<| xmlify inh rest) $
145 element "references" $
146 span >>= xmlify inh
147 where
148 spanlBrackets :: Roots -> (Roots, Roots)
149 spanlBrackets =
150 Seq.spanl $ \case
151 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
152 _ -> False
153 ----------------------
154 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
155 NodeText x
156 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
157 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
158 ----------------------
159 -- NOTE: detect [some text](http://some.url) or [SomeRef]
160 NodePair PairParen
161 | Tree (Cell bb eb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
162 (<| xmlify inh rs') $
163 case bracket of
164 (toList -> [unTree -> Cell bl el (NodeToken (TokenLink lnk))]) ->
165 element "eref" $
166 xmlAttrs [Cell bl el ("to",lnk)] <>
167 xmlify inh ts
168 _ ->
169 element "rref" $
170 xmlAttrs [Cell bb eb ("to",Plain.document bracket)] <>
171 xmlify inh ts
172 ----------------------
173 -- NOTE: gather HeaderDash
174 _ | (span, rest) <- spanlItems (==HeaderDash) roots
175 , not $ null span ->
176 (<| xmlify inh rest) $
177 element "ul" $
178 span >>= xmlify inh{inh_para=List.repeat elementPara}
179 ----------------------
180 -- NOTE: gather HeaderDot
181 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
182 , not $ null span ->
183 (<| xmlify inh rest) $
184 element "ol" $
185 span >>= xmlify inh{inh_para=List.repeat elementPara}
186 where
187 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
188 spanlItems liHeader =
189 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
190 case nod of
191 NodeHeader (HeaderColon "li" _wh) -> True
192 NodeHeader hdr -> liHeader hdr
193 NodePair (PairElem "li" _as) -> True
194 _ -> False
195 ----------------------
196 NodePara | para:inh_para <- inh_para inh ->
197 para inh r <|
198 -- para (() <$ cr) (xmlify inh ts) <|
199 xmlify inh{inh_para} rs
200 ----------------------
201 -- NOTE: context-free Root
202 _ ->
203 xmlify inh r <>
204 xmlify inh rs
205 where
206 element :: XmlName -> XMLs -> XML
207 element n = Tree (XmlElem n <$ cr)
208 instance Xmlify Root where
209 xmlify inh tn@(Tree (Cell bn en nod) ts) =
210 case nod of
211 NodeGroup -> xmlify inh ts
212 ----------------------
213 NodePara ->
214 case inh_para inh of
215 [] -> xmlify inh ts
216 para:_ -> Seq.singleton $ para inh tn -- para (() <$ cn) $ xmlify inh ts
217 ----------------------
218 NodeHeader hdr ->
219 case hdr of
220 --
221 HeaderSection{} ->
222 Seq.singleton $
223 element "section" $ head <> xmlify inh' body
224 where
225 (titles, content) = partitionSection tn
226 (attrs, body) = partitionAttrs content
227 head =
228 case Seq.viewl titles of
229 EmptyL -> mempty
230 title@(unTree -> ct) :< subtitles ->
231 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
232 xmlify inh{inh_para=List.repeat elementTitle} title <>
233 aliases
234 where
235 aliases =
236 subtitles >>= \subtitle@(unTree -> cs) ->
237 return $
238 Tree (cs $> XmlElem "alias") $
239 xmlAttrs [cs $> ("id",getAttrId subtitle)]
240 inh' = inh
241 { inh_para = List.repeat elementPara
242 , inh_figure = True
243 }
244 --
245 HeaderColon n _wh ->
246 let (attrs,body) = partitionAttrs ts in
247 case n of
248 -- NOTE: insert titles into <about>.
249 "about" ->
250 Seq.singleton $
251 element "about" $
252 xmlify inh' (inh_titles inh) <>
253 xmlAttrs attrs <>
254 xmlify inh' body
255 -- NOTE: in <figure> mode, unreserved elements become <figure>
256 _ | inh_figure inh && n`List.notElem`elems || TL.null n ->
257 Seq.singleton $
258 element "figure" $
259 -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <>
260 xmlAttrs (attrs `defaultAttr` Cell bn bn ("type", n)) <>
261 case toList body of
262 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
263 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
264 -- NOTE: reserved elements
265 _ ->
266 Seq.singleton $
267 element (xmlLocalName n) $
268 xmlAttrs attrs <>
269 xmlify inh' body
270 where
271 inh' = inh
272 { inh_para =
273 case n of
274 "about" -> List.repeat elementTitle
275 "reference" -> elementTitle : List.repeat elementPara
276 "serie" -> List.repeat attributeName
277 "author" -> List.repeat attributeName
278 "editor" -> List.repeat attributeName
279 "org" -> List.repeat attributeName
280 "note" -> List.repeat elementPara
281 _ -> []
282 }
283 --
284 HeaderBar n _wh ->
285 Seq.singleton $
286 element "artwork" $
287 xmlAttrs (Seq.singleton $ Cell bn bn ("type", n)) <>
288 xmlify inh{inh_para=[]} ts
289 --
290 HeaderGreat n _wh ->
291 Seq.singleton $
292 let (attrs,body) = partitionAttrs ts in
293 element "quote" $
294 xmlAttrs (attrs `defaultAttr` Cell bn bn ("type", n)) <>
295 xmlify inh{inh_para=List.repeat elementPara} body
296 --
297 HeaderEqual n _wh ->
298 Seq.singleton $
299 Tree0 $ cell $ XmlAttr (xmlLocalName n) $
300 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
301 --
302 HeaderDot _n -> Seq.singleton $ element "li" $ xmlify inh ts
303 --
304 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
305 --
306 HeaderDashDash -> Seq.singleton $ Tree0 $ cell $ XmlComment $
307 -- debug1_ ("TS", ts) $
308 -- debug1_ ("RS", (S.evalState (Plain.rackUpLeft ts) Nothing)) $
309 Plain.document ts
310 -- Plain.text def $ S.evalState (Plain.rackUpLeft ts) Nothing
311 {-
312 TreeSeq.mapAlsoNode
313 (cell1 . unCell)
314 (\_k -> fmap $
315 TreeSeq.mapAlsoNode
316 (cell1 . unCell)
317 (\_k' -> cell1 . unCell)) <$> ts
318 -}
319 --
320 HeaderBrackets ident ->
321 let (attrs,body) = partitionAttrs ts in
322 Seq.singleton $
323 element "reference" $
324 xmlAttrs (setAttr (Cell en en ("id",ident)) attrs) <>
325 xmlify inh'{inh_para = elementTitle : elementTitle : List.repeat elementPara} body
326 where
327 inh' = inh{inh_figure = False}
328 --
329 HeaderDotSlash p ->
330 Seq.singleton $
331 element "include" $
332 xmlAttrs [cell ("href",TL.pack $ FP.replaceExtension p "dtc")] <>
333 xmlify inh ts
334 ----------------------
335 NodePair pair ->
336 case pair of
337 PairBracket | to <- Plain.document ts
338 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
339 Seq.singleton $
340 element "rref" $
341 xmlAttrs [cell ("to",to)]
342 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
343 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
344 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
345 PairFrenchquote ->
346 Seq.singleton $
347 element "q" $
348 xmlify inh ts
349 {-
350 case ts of
351 (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) ->
352 case Seq.viewr ls of
353 m :> Tree0 (Cell br er (TokenPlain r)) ->
354 xmlify inh $
355 Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l)))
356 <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r))))
357 _ ->
358 xmlify inh $
359 Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls
360 (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) ->
361 xmlify inh $
362 rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r)))
363 _ -> xmlify inh ts
364 -}
365 PairHash ->
366 Seq.singleton $
367 element "ref" $
368 xmlAttrs [cell ("to",Plain.document ts)]
369 PairElem name attrs ->
370 Seq.singleton $
371 element (xmlLocalName name) $
372 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
373 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
374 xmlify inh ts
375 _ ->
376 let (open, close) = pairBorders pair ts in
377 Seq.singleton (Tree0 $ Cell bn bn $ XmlText open) `unionXml`
378 xmlify inh ts `unionXml`
379 Seq.singleton (Tree0 $ Cell en en $ XmlText close)
380 ----------------------
381 NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
382 ----------------------
383 NodeToken tok ->
384 case tok of
385 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c
386 TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
387 TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)]
388 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
389 ----------------------
390 NodeLower n as ->
391 Seq.singleton $
392 element "artwork" $
393 xmlify inh ts
394 where
395 cell :: a -> Cell a
396 cell = Cell bn en
397 element :: XmlName -> XMLs -> XML
398 element n = Tree (cell $ XmlElem n)
399 instance Xmlify (Seq (Cell (XmlName,TL.Text))) where
400 xmlify _inh = xmlAttrs
401
402 -- * Elements
403
404 -- | Reserved elements' name
405 elems :: Set TL.Text
406 elems =
407 [ "about"
408 , "abstract"
409 , "address"
410 , "alias"
411 , "annotation"
412 , "area"
413 , "artwork"
414 , "aside"
415 , "audio"
416 , "author"
417 , "authors"
418 , "bcp14"
419 , "br"
420 , "call"
421 , "city"
422 , "code"
423 , "comment"
424 , "comments"
425 , "country"
426 , "date"
427 , "dd"
428 , "define"
429 , "del"
430 , "div"
431 , "dl"
432 , "document"
433 , "dt"
434 , "editor"
435 , "email"
436 , "embed"
437 , "eref"
438 , "fax"
439 , "feed"
440 , "feedback"
441 , "figure"
442 , "filter"
443 , "format"
444 , "from"
445 , "h"
446 , "hi"
447 , "html5"
448 , "i"
449 , "index"
450 , "iref"
451 , "keyword"
452 , "li"
453 , "link"
454 , "name"
455 , "note"
456 , "ol"
457 , "organization"
458 , "para"
459 , "postamble"
460 , "preamble"
461 , "q"
462 , "ref"
463 , "reference"
464 , "references"
465 , "region"
466 , "rref"
467 , "sc"
468 , "section"
469 , "serie"
470 , "source"
471 , "span"
472 , "street"
473 , "style"
474 , "sub"
475 , "sup"
476 , "table"
477 , "tbody"
478 , "td"
479 , "tel"
480 , "tfoot"
481 , "title"
482 , "th"
483 , "thead"
484 , "toc"
485 , "tof"
486 , "tr"
487 , "tt"
488 , "u"
489 , "ul"
490 , "uri"
491 , "version"
492 , "video"
493 , "workgroup"
494 , "xml"
495 , "zipcode"
496 ]
497
498 -- * Attributes
499
500 -- | Convenient alias, forcing the types
501 xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs
502 xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>)
503
504 -- | Extract attributes
505 partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots)
506 partitionAttrs ts = (attrs,cs)
507 where
508 (as,cs) = (`Seq.partition` ts) $ \case
509 Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n
510 _ -> False
511 attrs = attr <$> as
512 attr = \case
513 Tree (Cell bp ep (NodeHeader (HeaderEqual n _wh))) a ->
514 Cell bp ep (xmlLocalName n, v)
515 where
516 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
517 _ -> undefined
518
519 getAttrId :: Root -> TL.Text
520 getAttrId = Plain.document . Seq.singleton
521
522 setAttr ::
523 Cell (XmlName, TL.Text) ->
524 Seq (Cell (XmlName, TL.Text)) ->
525 Seq (Cell (XmlName, TL.Text))
526 setAttr a@(unCell -> (k, _v)) as =
527 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
528 Just idx -> Seq.update idx a as
529 Nothing -> a <| as
530
531 defaultAttr ::
532 Seq (Cell (XmlName, TL.Text)) ->
533 Cell (XmlName, TL.Text) ->
534 Seq (Cell (XmlName, TL.Text))
535 defaultAttr as a@(unCell -> (k, _v)) =
536 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
537 Just _idx -> as
538 Nothing -> a <| as
539
540 -- * Text
541
542 -- | Unify two 'XMLs', merging border 'XmlText's if any.
543 unionXml :: XMLs -> XMLs -> XMLs
544 unionXml x y =
545 case (Seq.viewr x, Seq.viewl y) of
546 (xs :> x0, y0 :< ys) ->
547 case (x0,y0) of
548 ( Tree0 (Cell bx ex (XmlText tx))
549 , Tree0 (Cell by ey (XmlText ty)) ) ->
550 xs `unionXml`
551 Seq.singleton (Tree0 $ (XmlText <$>) $ Cell bx ex tx <> Cell by ey ty) `unionXml`
552 ys
553 _ -> x <> y
554 (Seq.EmptyR, _) -> y
555 (_, Seq.EmptyL) -> x
556
557 unionsXml :: Foldable f => f XMLs -> XMLs
558 unionsXml = foldl' unionXml mempty