]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Write/XML.hs
Add error support in HTML5.
[doclang.git] / Hdoc / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Hdoc.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
22 import Data.Set (Set)
23 import Data.TreeSeq.Strict (Tree(..), tree0)
24 import Data.Tuple (uncurry)
25 import Prelude (Num(..), undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text.Lazy as TL
30 import qualified Hdoc.TCT.Write.Plain as Plain
31
32 -- import Hdoc.TCT.Debug
33 import Hdoc.TCT.Utils
34 import Hdoc.TCT hiding (Parser)
35 import Hdoc.XML
36 import Text.Blaze.XML ()
37
38 -- | Main entry point
39 --
40 -- NOTE: 'XmlNode' are still annotated with 'Cell',
41 -- but nothing is done to preserve any ordering amongst them,
42 -- because 'Node's sometimes need to be reordered
43 -- (eg. about/title may have a title from the section before,
44 -- hence outside of about).
45 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
46 writeXML :: Roots -> XMLs
47 writeXML doc =
48 -- (`S.evalState` def) $
49 case Seq.viewl doc of
50 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
51 let (titles, content) = partitionSection sec in
52 case Seq.viewl titles of
53 (unTree -> Cell st _) :< _ ->
54 xmlify def
55 { inh_titles = titles
56 , inh_figure = True
57 } contentWithAbout <>
58 xmlify def foot
59 where
60 contentWithAbout =
61 case Seq.findIndexL isAbout content of
62 Nothing -> Tree (Cell st $ NodeHeader $ HeaderColon "about" "") mempty <| content
63 Just{} -> content
64 isAbout = \case
65 (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True
66 _ -> False
67 _ -> xmlify def doc
68 _ -> xmlify def doc
69
70 -- * Type 'Inh'
71 data Inh
72 = Inh
73 { inh_figure :: Bool
74 , inh_para :: [Inh -> Root -> XML]
75 , inh_titles :: Roots
76 }
77 instance Default Inh where
78 def = Inh
79 { inh_figure = False
80 , inh_para = List.repeat elementPara
81 , inh_titles = mempty
82 }
83
84 -- ** 'inh_para'
85 elementPara :: Inh -> Root -> XML
86 elementPara inh (Tree c ts) = Tree (XmlElem "para" <$ c) $ xmlify inh ts
87
88 elementTitle :: Inh -> Root -> XML
89 elementTitle inh (Tree c ts) = Tree (XmlElem "title" <$ c) $ xmlify inh ts
90
91 elementName :: Inh -> Root -> XML
92 elementName inh (Tree c ts) = Tree (XmlElem "name" <$ c) $ xmlify inh ts
93
94 attributeName :: Inh -> Root -> XML
95 attributeName _inh (Tree c ts) = tree0 (XmlAttr "name" (Plain.writePlain ts) <$ c)
96
97 attributeId :: Inh -> Root -> XML
98 attributeId _inh (Tree c ts) = tree0 (XmlAttr "id" (Plain.writePlain ts) <$ c)
99
100 -- * Class 'Xmlify'
101 class Xmlify a where
102 xmlify :: Inh -> a -> XMLs
103 instance Xmlify Roots where
104 xmlify inh roots =
105 case Seq.viewl roots of
106 EmptyL -> mempty
107 r@(Tree cr@(Cell _sr nr) ts) :< rs ->
108 case nr of
109 ----------------------
110 -- NOTE: HeaderColon becomes parent
111 -- of any continuous following-sibling HeaderBar or HeaderGreat
112 NodeHeader (HeaderColon n _wh)
113 | (span, rest) <- spanlHeaderColon rs
114 , not $ null span ->
115 xmlify inh (Tree cr (ts<>span)) <>
116 xmlify inh rest
117 where
118 spanlHeaderColon :: Roots -> (Roots, Roots)
119 spanlHeaderColon =
120 Seq.spanl $ \case
121 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
122 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
123 _ -> False
124 ----------------------
125 -- NOTE: gather HeaderBrackets
126 NodeHeader HeaderBrackets{}
127 | (span,rest) <- spanlBrackets roots
128 , not (null span) ->
129 (<| xmlify inh rest) $
130 element "references" $
131 span >>= xmlify inh
132 where
133 spanlBrackets :: Roots -> (Roots, Roots)
134 spanlBrackets =
135 Seq.spanl $ \case
136 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
137 _ -> False
138 ----------------------
139 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
140 NodeText x
141 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
142 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
143 ----------------------
144 -- NOTE: detect (some text)[http://some.url] or (some text)[SomeRef]
145 NodePair PairParen
146 | Tree (Cell sb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
147 (<| xmlify inh rs') $
148 case bracket of
149 (toList -> [unTree -> Cell sl (NodeToken (TokenLink lnk))]) ->
150 element "eref" $
151 xmlAttrs [Cell sl ("to",lnk)] <>
152 xmlify inh ts
153 _ ->
154 element "rref" $
155 xmlAttrs [Cell sb ("to",Plain.writePlain bracket)] <>
156 if null ts -- NOTE: preserve empty parens
157 then Seq.singleton $ tree0 (XmlText "" <$ cr)
158 else xmlify inh ts
159 ----------------------
160 -- NOTE: gather HeaderDash
161 _ | (span, rest) <- spanlItems (==HeaderDash) roots
162 , not $ null span ->
163 (<| xmlify inh rest) $
164 element "ul" $
165 span >>= xmlify inh{inh_para=List.repeat elementPara}
166 ----------------------
167 -- NOTE: gather HeaderDot
168 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
169 , not $ null span ->
170 (<| xmlify inh rest) $
171 element "ol" $
172 span >>= xmlify inh{inh_para=List.repeat elementPara}
173 where
174 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
175 spanlItems liHeader =
176 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
177 case nod of
178 NodeHeader (HeaderColon "li" _wh) -> True
179 NodeHeader hdr -> liHeader hdr
180 NodePair (PairElem "li" _as) -> True
181 _ -> False
182 ----------------------
183 NodePara | para:inh_para <- inh_para inh ->
184 para inh r <|
185 xmlify inh{inh_para} rs
186 ----------------------
187 -- NOTE: context-free Root
188 _ ->
189 xmlify inh r `unionXml`
190 xmlify inh rs
191 where
192 element :: XmlName -> XMLs -> XML
193 element n = Tree (XmlElem n <$ cr)
194 instance Xmlify Root where
195 xmlify inh tn@(Tree (Cell ss@(sn:|ssn) nod) ts) =
196 case nod of
197 ----------------------
198 NodePara ->
199 case inh_para inh of
200 [] -> xmlify inh ts
201 para:_ -> Seq.singleton $ para inh tn
202 ----------------------
203 NodeHeader hdr ->
204 case hdr of
205 --
206 HeaderSection{} ->
207 Seq.singleton $
208 element "section" $ head <> xmlify inh' body
209 where
210 (titles, content) = partitionSection tn
211 (attrs, body) = partitionAttrs content
212 head =
213 case Seq.viewl titles of
214 EmptyL -> mempty
215 title@(unTree -> ct) :< subtitles ->
216 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
217 xmlify inh{inh_para=List.repeat elementTitle} title <>
218 aliases
219 where
220 aliases =
221 subtitles >>= \subtitle@(unTree -> cs) ->
222 return $
223 Tree (cs $> XmlElem "alias") $
224 xmlify inh{inh_para=List.repeat elementTitle} subtitle
225 inh' = inh
226 { inh_para = List.repeat elementPara
227 , inh_figure = True
228 }
229 --
230 HeaderColon n _wh ->
231 let (attrs,body) = partitionAttrs ts in
232 case n of
233 -- NOTE: insert titles into <about>
234 "about" ->
235 Seq.singleton $
236 element "about" $
237 xmlify inh' (inh_titles inh) <>
238 xmlAttrs attrs <>
239 xmlify inh'{inh_figure=False} body
240 -- NOTE: handle judgment
241 _ | n`List.elem`elemsJudgment -> -- FIXME: not a special case so far.
242 Seq.singleton $
243 element (xmlLocalName n) $
244 xmlAttrs attrs <>
245 xmlify inh'' body
246 where
247 inh'' = inh'
248 { inh_para =
249 case n of
250 "grades" -> List.repeat attributeId
251 "judges" -> List.repeat attributeId
252 _ -> List.repeat elementTitle
253 }
254 -- NOTE: in <figure> mode, unreserved elements become <figure>
255 _ | inh_figure inh && n`List.notElem`elems || TL.null n ->
256 Seq.singleton $
257 element "figure" $
258 -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <>
259 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_begin sn}:|ssn) ("type", n)) <>
260 case toList body of
261 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
262 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
263 -- NOTE: reserved elements
264 _ ->
265 Seq.singleton $
266 element (xmlLocalName n) $
267 xmlAttrs attrs <>
268 xmlify inh' body
269 where
270 inh' = inh
271 { inh_para =
272 case n of
273 "about" -> List.repeat elementTitle
274 "reference" -> elementTitle : List.repeat elementPara
275 "serie" -> List.repeat attributeName
276 "author" -> List.repeat attributeName
277 "editor" -> List.repeat attributeName
278 "org" -> List.repeat attributeName
279 "note" -> List.repeat elementPara
280 _ -> []
281 }
282 --
283 HeaderBar n wh ->
284 if inh_figure inh && n`List.notElem`elems || TL.null n
285 then
286 Seq.singleton $
287 element "artwork" $
288 xmlAttrs (Seq.singleton $ Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
289 xmlify inh{inh_para=[]} ts
290 else
291 xmlify inh $
292 Tree (cell $ NodeHeader $ HeaderColon n wh) ts
293 --
294 HeaderGreat n _wh ->
295 Seq.singleton $
296 let (attrs,body) = partitionAttrs ts in
297 element "quote" $
298 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
299 xmlify inh{inh_para=List.repeat elementPara} body
300 --
301 HeaderEqual n _wh ->
302 Seq.singleton $
303 Tree0 $ cell $ XmlAttr (xmlLocalName n) $
304 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
305 --
306 HeaderDot n ->
307 Seq.singleton $
308 element "li" $
309 let span_end = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length n)} in
310 xmlAttrs (Seq.singleton $ Cell (sn{span_end}:|ssn) ("name", n)) <>
311 xmlify inh ts
312 --
313 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
314 --
315 HeaderDashDash ->
316 Seq.singleton $ Tree0 $ cell $
317 XmlComment $ Plain.writePlain ts
318 --
319 HeaderBrackets ident ->
320 let (attrs,body) = partitionAttrs ts in
321 Seq.singleton $
322 element "reference" $
323 xmlAttrs (setAttr (Cell (sn{span_end=span_end sn}:|ssn) ("id",ident)) attrs) <>
324 xmlify inh'{inh_para = elementTitle : elementTitle : List.repeat elementPara} body
325 where
326 inh' = inh{inh_figure = False}
327 --
328 HeaderDotSlash _file -> xmlify inh ts
329 ----------------------
330 NodePair pair ->
331 case pair of
332 PairBracket | to <- Plain.writePlain ts
333 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
334 Seq.singleton $
335 element "rref" $
336 xmlAttrs [cell ("to",to)]
337 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
338 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
339 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
340 PairFrenchquote ->
341 Seq.singleton $
342 element "q" $
343 case ts of
344 (Seq.viewl -> Tree0 (Cell sl (NodeToken (TokenText l))) :< ls) ->
345 case Seq.viewr ls of
346 m :> Tree0 (Cell sr (NodeToken (TokenText r))) ->
347 xmlify inh $
348 Tree0 (Cell sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l))))
349 Seq.<|(m Seq.|>Tree0 (Cell sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r)))))
350 _ ->
351 xmlify inh $
352 Tree0 (Cell sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls
353 (Seq.viewr -> rs :> Tree0 (Cell sr (NodeToken (TokenText r)))) ->
354 xmlify inh $
355 rs Seq.|> Tree0 (Cell sr (NodeToken (TokenText (TL.dropAround Char.isSpace r))))
356 _ -> xmlify inh ts
357 PairHash ->
358 Seq.singleton $
359 element "tag" $
360 xmlify inh{inh_para=[]} ts
361 -- xmlAttrs [cell ("to",Plain.writePlain ts)]
362 PairElem name attrs ->
363 Seq.singleton $
364 element (xmlLocalName name) $
365 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
366 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
367 xmlify inh ts
368 _ ->
369 Seq.singleton (Tree0 $ Cell (sn{span_end=bn'}:|ssn) $ XmlText open) `unionXml`
370 xmlify inh ts `unionXml`
371 Seq.singleton (Tree0 $ Cell (sn{span_begin=en'}:|ssn) $ XmlText close)
372 where
373 (open, close) = pairBorders pair ts
374 bn' = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length open)}
375 en' = (span_end sn){pos_column=pos_column (span_end sn) - int (TL.length close)}
376 ----------------------
377 NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
378 ----------------------
379 NodeToken tok ->
380 case tok of
381 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c
382 TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
383 TokenTag t -> Seq.singleton $ element "tag" $ Seq.singleton $ Tree0 $ cell $ XmlText t
384 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
385 ----------------------
386 NodeLower n as ->
387 Seq.singleton $
388 element "artwork" $
389 xmlify inh ts
390 where
391 cell :: a -> Cell a
392 cell = Cell ss
393 element :: XmlName -> XMLs -> XML
394 element n = Tree (cell $ XmlElem n)
395 instance Xmlify (Seq (Cell (XmlName,TL.Text))) where
396 xmlify _inh = xmlAttrs
397
398 -- * Elements
399
400 -- | Reserved elements' name
401 elems :: Set TL.Text
402 elems =
403 [ "about"
404 , "abstract"
405 , "address"
406 , "alias"
407 , "annotation"
408 , "area"
409 , "artwork"
410 , "aside"
411 , "audio"
412 , "author"
413 , "authors"
414 , "bcp14"
415 , "br"
416 , "break"
417 , "call"
418 , "city"
419 , "code"
420 , "comment"
421 , "comments"
422 , "country"
423 , "date"
424 , "dd"
425 , "default"
426 , "define"
427 , "del"
428 , "div"
429 , "dl"
430 , "document"
431 , "dt"
432 , "editor"
433 , "email"
434 , "embed"
435 , "eref"
436 , "fax"
437 , "feed"
438 , "feedback"
439 , "figure"
440 , "filter"
441 , "format"
442 , "from"
443 , "h"
444 , "hi"
445 , "html5"
446 , "i"
447 , "index"
448 , "iref"
449 , "keyword"
450 , "li"
451 , "link"
452 , "name"
453 , "note"
454 , "ol"
455 , "organization"
456 , "para"
457 , "postamble"
458 , "preamble"
459 , "q"
460 , "reference"
461 , "references"
462 , "region"
463 , "rref"
464 , "sc"
465 , "section"
466 , "serie"
467 , "source"
468 , "span"
469 , "street"
470 , "style"
471 , "sub"
472 , "sup"
473 , "table"
474 , "tag"
475 , "tbody"
476 , "td"
477 , "tel"
478 , "tfoot"
479 , "th"
480 , "thead"
481 , "title"
482 , "toc"
483 , "tof"
484 , "tr"
485 , "tt"
486 , "u"
487 , "ul"
488 , "uri"
489 , "version"
490 , "video"
491 , "workgroup"
492 , "xml"
493 , "zipcode"
494 ]
495
496 elemsJudgment :: Set TL.Text
497 elemsJudgment =
498 [ "choice"
499 , "grade"
500 , "grades"
501 , "judge"
502 , "judges"
503 , "judgment"
504 , "opinion"
505 ]
506
507 -- * Attributes
508
509 -- | Convenient alias, forcing the types
510 xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs
511 xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>)
512
513 -- | Extract section titles
514 partitionSection :: Root -> (Roots, Roots)
515 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
516 case Seq.viewl body of
517 EmptyL -> mempty
518 title@(unTree -> Cell (Span{span_end=et}:|_) NodePara) :< rest ->
519 let (subtitles, content) = spanlSubtitles et rest in
520 (title <| (subtitles >>= subTrees), content)
521 where
522 spanlSubtitles ep ts =
523 case Seq.viewl ts of
524 sub@(unTree -> Cell (Span{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs
525 | lvlSub <= lvlPar
526 , pos_line span_begin - pos_line ep <= 1 ->
527 let (subs, ts') = spanlSubtitles span_end rs in
528 (sub <| subs, ts')
529 _ -> (mempty, ts)
530 _ -> (mempty, body)
531 partitionSection _ = mempty
532
533 -- | Extract attributes
534 partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots)
535 partitionAttrs ts = (attrs,cs)
536 where
537 (as,cs) = (`Seq.partition` ts) $ \case
538 Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n
539 _ -> False
540 attrs = attr <$> as
541 attr = \case
542 Tree (Cell ssn (NodeHeader (HeaderEqual n _wh))) a ->
543 Cell ssn (xmlLocalName n, v)
544 where
545 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
546 _ -> undefined
547
548 getAttrId :: Root -> TL.Text
549 getAttrId = Plain.writePlain . Seq.singleton
550
551 setAttr ::
552 Cell (XmlName, TL.Text) ->
553 Seq (Cell (XmlName, TL.Text)) ->
554 Seq (Cell (XmlName, TL.Text))
555 setAttr a@(unCell -> (k, _v)) as =
556 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
557 Just idx -> Seq.update idx a as
558 Nothing -> a <| as
559
560 defaultAttr ::
561 Seq (Cell (XmlName, TL.Text)) ->
562 Cell (XmlName, TL.Text) ->
563 Seq (Cell (XmlName, TL.Text))
564 defaultAttr as a@(unCell -> (k, _v)) =
565 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
566 Just _idx -> as
567 Nothing -> a <| as
568
569 -- * Text
570
571 -- | Unify two 'XMLs', merging border 'XmlText's if any.
572 unionXml :: XMLs -> XMLs -> XMLs
573 unionXml x y =
574 case (Seq.viewr x, Seq.viewl y) of
575 (xs :> x0, y0 :< ys) ->
576 case (x0,y0) of
577 ( Tree0 (Cell ssx@(Span {span_file=fx}:|_sx) (XmlText tx))
578 , Tree0 (Cell ssy@(Span {span_file=fy}:|_sy) (XmlText ty)) ) | fx == fy ->
579 xs `unionXml`
580 Seq.singleton (Tree0 $ (XmlText <$>) $ Cell ssx tx <> Cell ssy ty) `unionXml`
581 ys
582 _ -> x <> y
583 (Seq.EmptyR, _) -> y
584 (_, Seq.EmptyL) -> x
585
586 unionsXml :: Foldable f => f XMLs -> XMLs
587 unionsXml = foldl' unionXml mempty