]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Write/XML.hs
Fix HTML5 rendering of external references.
[doclang.git] / Hdoc / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Hdoc.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
22 import Data.Set (Set)
23 import Data.TreeSeq.Strict (Tree(..), tree0)
24 import Data.Tuple (uncurry)
25 import Prelude (Num(..), undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text.Lazy as TL
30 import qualified Hdoc.TCT.Write.Plain as Plain
31
32 -- import Hdoc.TCT.Debug
33 import Hdoc.TCT.Utils
34 import Hdoc.TCT as TCT hiding (Parser)
35 import Hdoc.XML (XML, XMLs)
36 import qualified Hdoc.XML as XML
37 import Text.Blaze.XML ()
38
39 -- | Main entry point
40 --
41 -- NOTE: 'XmlNode' are still annotated with 'Cell',
42 -- but nothing is done to preserve any ordering amongst them,
43 -- because 'Node's sometimes need to be reordered
44 -- (eg. about/title may have a title from the section before,
45 -- hence outside of about).
46 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
47 writeXML :: Roots -> XMLs
48 writeXML doc =
49 -- (`S.evalState` def) $
50 case Seq.viewl doc of
51 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
52 let (titles, content) = partitionSection sec in
53 case Seq.viewl titles of
54 (unTree -> Cell st _) :< _ ->
55 xmlify def
56 { inh_titles = titles
57 , inh_figure = True
58 } contentWithAbout <>
59 xmlify def foot
60 where
61 contentWithAbout =
62 case Seq.findIndexL isAbout content of
63 Nothing -> Tree (Cell st $ NodeHeader $ HeaderColon "about" "") mempty <| content
64 Just{} -> content
65 isAbout = \case
66 (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True
67 _ -> False
68 _ -> xmlify def doc
69 _ -> xmlify def doc
70
71 -- * Type 'Inh'
72 data Inh
73 = Inh
74 { inh_figure :: Bool
75 , inh_para :: [Inh -> Root -> XML]
76 , inh_titles :: Roots
77 }
78 instance Default Inh where
79 def = Inh
80 { inh_figure = False
81 , inh_para = List.repeat elementPara
82 , inh_titles = mempty
83 }
84
85 -- ** 'inh_para'
86 elementPara :: Inh -> Root -> XML
87 elementPara inh (Tree c ts) = Tree (XML.NodeElem "para" <$ c) $ xmlify inh ts
88
89 elementTitle :: Inh -> Root -> XML
90 elementTitle inh (Tree c ts) = Tree (XML.NodeElem "title" <$ c) $ xmlify inh ts
91
92 elementName :: Inh -> Root -> XML
93 elementName inh (Tree c ts) = Tree (XML.NodeElem "name" <$ c) $ xmlify inh ts
94
95 attributeName :: Inh -> Root -> XML
96 attributeName _inh (Tree c ts) = tree0 (XML.NodeAttr "name" (Plain.writePlain ts) <$ c)
97
98 attributeId :: Inh -> Root -> XML
99 attributeId _inh (Tree c ts) = tree0 (XML.NodeAttr "id" (Plain.writePlain ts) <$ c)
100
101 -- * Class 'Xmlify'
102 class Xmlify a where
103 xmlify :: Inh -> a -> XMLs
104 instance Xmlify Roots where
105 xmlify inh roots =
106 case Seq.viewl roots of
107 EmptyL -> mempty
108 r@(Tree cr@(Cell _sr nr) ts) :< rs ->
109 case nr of
110 ----------------------
111 -- NOTE: HeaderColon becomes parent
112 -- of any continuous following-sibling HeaderBar or HeaderGreat
113 NodeHeader (HeaderColon n _wh)
114 | (span, rest) <- spanlHeaderColon rs
115 , not $ null span ->
116 xmlify inh (Tree cr (ts<>span)) <>
117 xmlify inh rest
118 where
119 spanlHeaderColon :: Roots -> (Roots, Roots)
120 spanlHeaderColon =
121 Seq.spanl $ \case
122 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
123 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
124 _ -> False
125 ----------------------
126 -- NOTE: gather HeaderBrackets
127 NodeHeader HeaderBrackets{}
128 | (span,rest) <- spanlBrackets roots
129 , not (null span) ->
130 (<| xmlify inh rest) $
131 element "references" $
132 span >>= xmlify inh
133 where
134 spanlBrackets :: Roots -> (Roots, Roots)
135 spanlBrackets =
136 Seq.spanl $ \case
137 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
138 _ -> False
139 ----------------------
140 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
141 NodeText x
142 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
143 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
144 ----------------------
145 -- NOTE: detect (some text)[http://some.url] or (some text)[SomeRef]
146 NodePair PairParen
147 | Tree (Cell sb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
148 (<| xmlify inh rs') $
149 case bracket of
150 (toList -> [unTree -> Cell sl (NodeToken (TokenLink lnk))]) ->
151 element "eref" $
152 xmlAttrs [Cell sl ("to",lnk)] <>
153 xmlify inh ts
154 _ ->
155 element "rref" $
156 xmlAttrs [Cell sb ("to",Plain.writePlain bracket)] <>
157 if null ts -- NOTE: preserve empty parens
158 then Seq.singleton $ tree0 (XML.NodeText "" <$ cr)
159 else xmlify inh ts
160 ----------------------
161 -- NOTE: gather HeaderDash
162 _ | (span, rest) <- spanlItems (==HeaderDash) roots
163 , not $ null span ->
164 (<| xmlify inh rest) $
165 element "ul" $
166 span >>= xmlify inh{inh_para=List.repeat elementPara}
167 ----------------------
168 -- NOTE: gather HeaderDot
169 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
170 , not $ null span ->
171 (<| xmlify inh rest) $
172 element "ol" $
173 span >>= xmlify inh{inh_para=List.repeat elementPara}
174 where
175 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
176 spanlItems liHeader =
177 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
178 case nod of
179 NodeHeader (HeaderColon "li" _wh) -> True
180 NodeHeader hdr -> liHeader hdr
181 NodePair (PairElem "li" _as) -> True
182 _ -> False
183 ----------------------
184 NodePara | para:inh_para <- inh_para inh ->
185 para inh r <|
186 xmlify inh{inh_para} rs
187 ----------------------
188 -- NOTE: context-free Root
189 _ ->
190 xmlify inh r `unionXml`
191 xmlify inh rs
192 where
193 element :: XML.Name -> XMLs -> XML
194 element n = Tree (XML.NodeElem n <$ cr)
195 instance Xmlify Root where
196 xmlify inh tn@(Tree (Cell ss@(sn:|ssn) nod) ts) =
197 case nod of
198 ----------------------
199 NodePara ->
200 case inh_para inh of
201 [] -> xmlify inh ts
202 para:_ -> Seq.singleton $ para inh tn
203 ----------------------
204 NodeHeader hdr ->
205 case hdr of
206 --
207 HeaderSection{} ->
208 Seq.singleton $
209 element "section" $ head <> xmlify inh' body
210 where
211 (titles, content) = partitionSection tn
212 (attrs, body) = partitionAttrs content
213 head =
214 case Seq.viewl titles of
215 EmptyL -> mempty
216 title@(unTree -> ct) :< subtitles ->
217 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
218 xmlify inh{inh_para=List.repeat elementTitle} title <>
219 aliases
220 where
221 aliases =
222 subtitles >>= \subtitle@(unTree -> cs) ->
223 return $
224 Tree (cs $> XML.NodeElem "alias") $
225 xmlify inh{inh_para=List.repeat elementTitle} subtitle
226 inh' = inh
227 { inh_para = List.repeat elementPara
228 , inh_figure = True
229 }
230 --
231 HeaderColon n _wh ->
232 let (attrs,body) = partitionAttrs ts in
233 case n of
234 -- NOTE: insert titles into <about>
235 "about" ->
236 Seq.singleton $
237 element "about" $
238 xmlify inh' (inh_titles inh) <>
239 xmlAttrs attrs <>
240 xmlify inh'{inh_figure=False} body
241 -- NOTE: handle judgment
242 _ | n`List.elem`elemsJudgment -> -- FIXME: not a special case so far.
243 Seq.singleton $
244 element (XML.localName n) $
245 xmlAttrs attrs <>
246 xmlify inh'' body
247 where
248 inh'' = inh'
249 { inh_para =
250 case n of
251 "grades" -> List.repeat attributeId
252 "judges" -> List.repeat attributeId
253 _ -> List.repeat elementTitle
254 }
255 -- NOTE: in <figure> mode, unreserved elements become <figure>
256 _ | inh_figure inh && n`List.notElem`elems || TL.null n ->
257 Seq.singleton $
258 element "figure" $
259 -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <>
260 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_begin sn}:|ssn) ("type", n)) <>
261 case toList body of
262 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
263 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
264 -- NOTE: reserved elements
265 _ ->
266 Seq.singleton $
267 element (XML.localName n) $
268 xmlAttrs attrs <>
269 xmlify inh' body
270 where
271 inh' = inh
272 { inh_para =
273 case n of
274 "about" -> List.repeat elementTitle
275 "reference" -> elementTitle : List.repeat elementPara
276 "serie" -> List.repeat attributeName
277 "author" -> List.repeat attributeName
278 "editor" -> List.repeat attributeName
279 "org" -> List.repeat attributeName
280 "note" -> List.repeat elementPara
281 _ -> []
282 }
283 --
284 HeaderBar n wh ->
285 if inh_figure inh && n`List.notElem`elems || TL.null n
286 then
287 Seq.singleton $
288 element "artwork" $
289 xmlAttrs (Seq.singleton $ Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
290 xmlify inh{inh_para=[]} ts
291 else
292 xmlify inh $
293 Tree (cell $ NodeHeader $ HeaderColon n wh) ts
294 --
295 HeaderGreat n _wh ->
296 Seq.singleton $
297 let (attrs,body) = partitionAttrs ts in
298 element "quote" $
299 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
300 xmlify inh{inh_para=List.repeat elementPara} body
301 --
302 HeaderEqual n _wh ->
303 Seq.singleton $
304 Tree0 $ cell $ XML.NodeAttr (XML.localName n) $
305 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
306 --
307 HeaderDot n ->
308 Seq.singleton $
309 element "li" $
310 let span_end = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length n)} in
311 xmlAttrs (Seq.singleton $ Cell (sn{span_end}:|ssn) ("name", n)) <>
312 xmlify inh ts
313 --
314 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
315 --
316 HeaderDashDash ->
317 Seq.singleton $ Tree0 $ cell $
318 XML.NodeComment $ Plain.writePlain ts
319 --
320 HeaderBrackets ident ->
321 let (attrs,body) = partitionAttrs ts in
322 Seq.singleton $
323 element "reference" $
324 xmlAttrs (setAttr (Cell (sn{span_end=span_end sn}:|ssn) ("id",ident)) attrs) <>
325 xmlify inh'{inh_para = elementTitle : elementTitle : List.repeat elementPara} body
326 where
327 inh' = inh{inh_figure = False}
328 --
329 HeaderDotSlash _file -> xmlify inh ts
330 ----------------------
331 NodePair pair ->
332 case pair of
333 PairBracket | to <- Plain.writePlain ts
334 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
335 Seq.singleton $
336 element "rref" $
337 xmlAttrs [cell ("to",to)]
338 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
339 PairDash -> Seq.singleton $ element "del" $ xmlify inh ts
340 PairUnderscore -> Seq.singleton $ element "u" $ xmlify inh ts
341 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
342 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
343 PairFrenchquote ->
344 Seq.singleton $
345 element "q" $
346 case ts of
347 (Seq.viewl -> Tree0 (Cell sl (NodeToken (TokenText l))) :< ls) ->
348 case Seq.viewr ls of
349 m :> Tree0 (Cell sr (NodeToken (TokenText r))) ->
350 xmlify inh $
351 Tree0 (Cell sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l))))
352 Seq.<|(m Seq.|>Tree0 (Cell sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r)))))
353 _ ->
354 xmlify inh $
355 Tree0 (Cell sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls
356 (Seq.viewr -> rs :> Tree0 (Cell sr (NodeToken (TokenText r)))) ->
357 xmlify inh $
358 rs Seq.|> Tree0 (Cell sr (NodeToken (TokenText (TL.dropAround Char.isSpace r))))
359 _ -> xmlify inh ts
360 PairHash ->
361 Seq.singleton $
362 element "tag" $
363 xmlify inh{inh_para=[]} ts
364 -- xmlAttrs [cell ("to",Plain.writePlain ts)]
365 PairElem name attrs ->
366 Seq.singleton $
367 element (XML.localName name) $
368 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
369 cell (XML.localName elemAttr_name,elemAttr_value)) <$> attrs) <>
370 xmlify inh ts
371 _ ->
372 Seq.singleton (Tree0 $ Cell (sn{span_end=bn'}:|ssn) $ XML.NodeText open) `unionXml`
373 xmlify inh ts `unionXml`
374 Seq.singleton (Tree0 $ Cell (sn{span_begin=en'}:|ssn) $ XML.NodeText close)
375 where
376 (open, close) = pairBorders pair ts
377 bn' = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length open)}
378 en' = (span_end sn){pos_column=pos_column (span_end sn) - int (TL.length close)}
379 ----------------------
380 NodeText t -> Seq.singleton $ Tree0 $ cell $ XML.NodeText t
381 ----------------------
382 NodeToken tok ->
383 case tok of
384 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XML.NodeText $ TL.singleton c
385 TokenText t -> Seq.singleton $ Tree0 $ cell $ XML.NodeText t
386 TokenTag t -> Seq.singleton $ element "tag" $ Seq.singleton $ Tree0 $ cell $ XML.NodeText t
387 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
388 ----------------------
389 NodeLower n as ->
390 Seq.singleton $
391 element "artwork" $
392 xmlify inh ts
393 where
394 cell :: a -> Cell a
395 cell = Cell ss
396 element :: XML.Name -> XMLs -> XML
397 element n = Tree (cell $ XML.NodeElem n)
398 instance Xmlify (Seq (Cell (XML.Name,TL.Text))) where
399 xmlify _inh = xmlAttrs
400
401 -- * Elements
402
403 -- | Reserved elements' name
404 elems :: Set TL.Text
405 elems =
406 [ "about"
407 , "abstract"
408 , "address"
409 , "alias"
410 , "annotation"
411 , "area"
412 , "artwork"
413 , "aside"
414 , "audio"
415 , "author"
416 , "authors"
417 , "bcp14"
418 , "br"
419 , "break"
420 , "call"
421 , "city"
422 , "code"
423 , "comment"
424 , "comments"
425 , "country"
426 , "date"
427 , "dd"
428 , "default"
429 , "define"
430 , "del"
431 , "div"
432 , "dl"
433 , "document"
434 , "dt"
435 , "editor"
436 , "email"
437 , "embed"
438 , "eref"
439 , "fax"
440 , "feed"
441 , "feedback"
442 , "figure"
443 , "filter"
444 , "format"
445 , "from"
446 , "h"
447 , "hi"
448 , "html5"
449 , "i"
450 , "index"
451 , "iref"
452 , "keyword"
453 , "li"
454 , "link"
455 , "name"
456 , "note"
457 , "ol"
458 , "organization"
459 , "para"
460 , "postamble"
461 , "preamble"
462 , "q"
463 , "reference"
464 , "references"
465 , "region"
466 , "rref"
467 , "sc"
468 , "section"
469 , "serie"
470 , "source"
471 , "span"
472 , "street"
473 , "style"
474 , "sub"
475 , "sup"
476 , "table"
477 , "tag"
478 , "tbody"
479 , "td"
480 , "tel"
481 , "tfoot"
482 , "th"
483 , "thead"
484 , "title"
485 , "toc"
486 , "tof"
487 , "tr"
488 , "tt"
489 , "u"
490 , "ul"
491 , "uri"
492 , "version"
493 , "video"
494 , "workgroup"
495 , "xml"
496 , "zipcode"
497 ]
498
499 elemsJudgment :: Set TL.Text
500 elemsJudgment =
501 [ "choice"
502 , "grade"
503 , "grades"
504 , "judge"
505 , "judges"
506 , "judgment"
507 , "opinion"
508 ]
509
510 -- * Attributes
511
512 -- | Convenient alias, forcing the types
513 xmlAttrs :: Seq (Cell (XML.Name,TL.Text)) -> XMLs
514 xmlAttrs = (Tree0 . (uncurry XML.NodeAttr <$>) <$>)
515
516 -- | Extract section titles
517 partitionSection :: Root -> (Roots, Roots)
518 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
519 case Seq.viewl body of
520 EmptyL -> mempty
521 title@(unTree -> Cell (Span{span_end=et}:|_) NodePara) :< rest ->
522 let (subtitles, content) = spanlSubtitles et rest in
523 (title <| (subtitles >>= subTrees), content)
524 where
525 spanlSubtitles ep ts =
526 case Seq.viewl ts of
527 sub@(unTree -> Cell (Span{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs
528 | lvlSub <= lvlPar
529 , pos_line span_begin - pos_line ep <= 1 ->
530 let (subs, ts') = spanlSubtitles span_end rs in
531 (sub <| subs, ts')
532 _ -> (mempty, ts)
533 _ -> (mempty, body)
534 partitionSection _ = mempty
535
536 -- | Extract attributes
537 partitionAttrs :: Roots -> (Seq (Cell (XML.Name, TL.Text)), Roots)
538 partitionAttrs ts = (attrs,cs)
539 where
540 (as,cs) = (`Seq.partition` ts) $ \case
541 Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n
542 _ -> False
543 attrs = attr <$> as
544 attr = \case
545 Tree (Cell loc (NodeHeader (HeaderEqual n _wh))) a ->
546 Cell loc (XML.localName n, v)
547 where
548 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
549 _ -> undefined
550
551 getAttrId :: Root -> TL.Text
552 getAttrId = Plain.writePlain . Seq.singleton
553
554 setAttr ::
555 Cell (XML.Name, TL.Text) ->
556 Seq (Cell (XML.Name, TL.Text)) ->
557 Seq (Cell (XML.Name, TL.Text))
558 setAttr a@(unCell -> (k, _v)) as =
559 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
560 Just idx -> Seq.update idx a as
561 Nothing -> a <| as
562
563 defaultAttr ::
564 Seq (Cell (XML.Name, TL.Text)) ->
565 Cell (XML.Name, TL.Text) ->
566 Seq (Cell (XML.Name, TL.Text))
567 defaultAttr as a@(unCell -> (k, _v)) =
568 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
569 Just _idx -> as
570 Nothing -> a <| as
571
572 -- * Text
573
574 -- | Unify two 'XMLs', merging border 'XML.NodeText's if any.
575 unionXml :: XMLs -> XMLs -> XMLs
576 unionXml x y =
577 case (Seq.viewr x, Seq.viewl y) of
578 (xs :> x0, y0 :< ys) ->
579 case (x0,y0) of
580 ( Tree0 (Cell ssx@(Span {span_file=fx}:|_sx) (XML.NodeText tx))
581 , Tree0 (Cell ssy@(Span {span_file=fy}:|_sy) (XML.NodeText ty)) ) | fx == fy ->
582 xs `unionXml`
583 Seq.singleton (Tree0 $ (XML.NodeText <$>) $ Cell ssx tx <> Cell ssy ty) `unionXml`
584 ys
585 _ -> x <> y
586 (Seq.EmptyR, _) -> y
587 (_, Seq.EmptyL) -> x
588
589 unionsXml :: Foldable f => f XMLs -> XMLs
590 unionsXml = foldl' unionXml mempty