]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Modify HTML5 ParaOL and ParaUL rendering.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Ord (Ord(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
21 import Data.Set (Set)
22 import Data.TreeSeq.Strict (Tree(..), tree0)
23 import Data.Tuple (uncurry)
24 import Prelude (Num(..), undefined)
25 import qualified Data.Char as Char
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text.Lazy as TL
29 import qualified Language.TCT.Write.Plain as Plain
30 import qualified System.FilePath as FP
31
32 -- import Language.TCT.Debug
33 import Language.TCT.Utils
34 import Language.TCT hiding (Parser)
35 import Language.XML
36 import Text.Blaze.XML ()
37
38 -- | Main entry point
39 --
40 -- NOTE: 'XmlNode' are still annotated with 'Cell',
41 -- but nothing is done to preserve any ordering amongst them,
42 -- because 'Node's sometimes need to be reordered
43 -- (eg. about/title may have a title from the section before,
44 -- hence outside of about).
45 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
46 document :: Roots -> XMLs
47 document doc =
48 -- (`S.evalState` def) $
49 case Seq.viewl doc of
50 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
51 let (titles, content) = partitionSection sec in
52 case Seq.viewl titles of
53 (unTree -> Cell bt et _) :< _ ->
54 xmlify def
55 { inh_titles = titles
56 , inh_figure = True
57 } contentWithAbout <>
58 xmlify def foot
59 where
60 contentWithAbout =
61 case Seq.findIndexL isAbout content of
62 Nothing -> Tree (Cell bt et $ NodeHeader $ HeaderColon "about" "") mempty <| content
63 Just{} -> content
64 isAbout = \case
65 (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True
66 _ -> False
67 _ -> xmlify def doc
68 _ -> xmlify def doc
69
70 partitionSection :: Root -> (Roots, Roots)
71 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
72 case Seq.viewl body of
73 EmptyL -> mempty
74 title@(unTree -> Cell _bt et NodePara) :< rest ->
75 let (subtitles, content) = spanlSubtitles et rest in
76 (title <| (subtitles >>= subTrees), content)
77 where
78 spanlSubtitles ep ts =
79 case Seq.viewl ts of
80 sub@(unTree -> Cell bs es (NodeHeader (HeaderSection lvlSub))) :< rs
81 | lvlSub <= lvlPar
82 , pos_line bs - pos_line ep <= 1 ->
83 let (subs, ts') = spanlSubtitles es rs in
84 (sub <| subs, ts')
85 _ -> (mempty, ts)
86 _ -> (mempty, body)
87 partitionSection _ = mempty
88
89 -- * Type 'Inh'
90 data Inh
91 = Inh
92 { inh_figure :: Bool
93 , inh_para :: [Inh -> Root -> XML]
94 , inh_titles :: Roots
95 }
96 instance Default Inh where
97 def = Inh
98 { inh_figure = False
99 , inh_para = List.repeat elementPara
100 , inh_titles = mempty
101 }
102
103 -- ** 'inh_para'
104 elementPara :: Inh -> Root -> XML
105 elementPara inh (Tree c ts) = Tree (XmlElem "para" <$ c) $ xmlify inh ts
106
107 elementTitle :: Inh -> Root -> XML
108 elementTitle inh (Tree c ts) = Tree (XmlElem "title" <$ c) $ xmlify inh ts
109
110 elementName :: Inh -> Root -> XML
111 elementName inh (Tree c ts) = Tree (XmlElem "name" <$ c) $ xmlify inh ts
112
113 attributeName :: Inh -> Root -> XML
114 attributeName _inh (Tree c ts) = tree0 (XmlAttr "name" (Plain.document ts) <$ c)
115
116 -- * Class 'Xmlify'
117 class Xmlify a where
118 xmlify :: Inh -> a -> XMLs
119 instance Xmlify Roots where
120 xmlify inh roots =
121 case Seq.viewl roots of
122 EmptyL -> mempty
123 r@(Tree cr@(Cell _br _er nr) ts) :< rs ->
124 case nr of
125 ----------------------
126 -- NOTE: HeaderColon becomes parent
127 -- of any continuous following-sibling HeaderBar or HeaderGreat
128 NodeHeader (HeaderColon n _wh)
129 | (span, rest) <- spanlHeaderColon rs
130 , not $ null span ->
131 xmlify inh (Tree cr (ts<>span)) <>
132 xmlify inh rest
133 where
134 spanlHeaderColon :: Roots -> (Roots, Roots)
135 spanlHeaderColon =
136 Seq.spanl $ \case
137 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
138 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
139 _ -> False
140 ----------------------
141 -- NOTE: gather HeaderBrackets
142 NodeHeader HeaderBrackets{}
143 | (span,rest) <- spanlBrackets roots
144 , not (null span) ->
145 (<| xmlify inh rest) $
146 element "references" $
147 span >>= xmlify inh
148 where
149 spanlBrackets :: Roots -> (Roots, Roots)
150 spanlBrackets =
151 Seq.spanl $ \case
152 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
153 _ -> False
154 ----------------------
155 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
156 NodeText x
157 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
158 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
159 ----------------------
160 -- NOTE: detect [some text](http://some.url) or [SomeRef]
161 NodePair PairParen
162 | Tree (Cell bb eb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
163 (<| xmlify inh rs') $
164 case bracket of
165 (toList -> [unTree -> Cell bl el (NodeToken (TokenLink lnk))]) ->
166 element "eref" $
167 xmlAttrs [Cell bl el ("to",lnk)] <>
168 xmlify inh ts
169 _ ->
170 element "rref" $
171 xmlAttrs [Cell bb eb ("to",Plain.document bracket)] <>
172 xmlify inh ts
173 ----------------------
174 -- NOTE: gather HeaderDash
175 _ | (span, rest) <- spanlItems (==HeaderDash) roots
176 , not $ null span ->
177 (<| xmlify inh rest) $
178 element "ul" $
179 span >>= xmlify inh{inh_para=List.repeat elementPara}
180 ----------------------
181 -- NOTE: gather HeaderDot
182 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
183 , not $ null span ->
184 (<| xmlify inh rest) $
185 element "ol" $
186 span >>= xmlify inh{inh_para=List.repeat elementPara}
187 where
188 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
189 spanlItems liHeader =
190 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
191 case nod of
192 NodeHeader (HeaderColon "li" _wh) -> True
193 NodeHeader hdr -> liHeader hdr
194 NodePair (PairElem "li" _as) -> True
195 _ -> False
196 ----------------------
197 NodePara | para:inh_para <- inh_para inh ->
198 para inh r <|
199 -- para (() <$ cr) (xmlify inh ts) <|
200 xmlify inh{inh_para} rs
201 ----------------------
202 -- NOTE: context-free Root
203 _ ->
204 xmlify inh r <>
205 xmlify inh rs
206 where
207 element :: XmlName -> XMLs -> XML
208 element n = Tree (XmlElem n <$ cr)
209 instance Xmlify Root where
210 xmlify inh tn@(Tree (Cell bn en nod) ts) =
211 case nod of
212 NodeGroup -> xmlify inh ts
213 ----------------------
214 NodePara ->
215 case inh_para inh of
216 [] -> xmlify inh ts
217 para:_ -> Seq.singleton $ para inh tn -- para (() <$ cn) $ xmlify inh ts
218 ----------------------
219 NodeHeader hdr ->
220 case hdr of
221 --
222 HeaderSection{} ->
223 Seq.singleton $
224 element "section" $ head <> xmlify inh' body
225 where
226 (titles, content) = partitionSection tn
227 (attrs, body) = partitionAttrs content
228 head =
229 case Seq.viewl titles of
230 EmptyL -> mempty
231 title@(unTree -> ct) :< subtitles ->
232 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
233 xmlify inh{inh_para=List.repeat elementTitle} title <>
234 aliases
235 where
236 aliases =
237 subtitles >>= \subtitle@(unTree -> cs) ->
238 return $
239 Tree (cs $> XmlElem "alias") $
240 xmlAttrs [cs $> ("id",getAttrId subtitle)]
241 inh' = inh
242 { inh_para = List.repeat elementPara
243 , inh_figure = True
244 }
245 --
246 HeaderColon n _wh ->
247 let (attrs,body) = partitionAttrs ts in
248 case n of
249 -- NOTE: insert titles into <about>.
250 "about" ->
251 Seq.singleton $
252 element "about" $
253 xmlify inh' (inh_titles inh) <>
254 xmlAttrs attrs <>
255 xmlify inh' body
256 -- NOTE: in <figure> mode, unreserved elements become <figure>
257 _ | inh_figure inh && n`List.notElem`elems || TL.null n ->
258 Seq.singleton $
259 element "figure" $
260 -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <>
261 xmlAttrs (attrs `defaultAttr` Cell bn bn ("type", n)) <>
262 case toList body of
263 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
264 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
265 -- NOTE: reserved elements
266 _ ->
267 Seq.singleton $
268 element (xmlLocalName n) $
269 xmlAttrs attrs <>
270 xmlify inh' body
271 where
272 inh' = inh
273 { inh_para =
274 case n of
275 "about" -> List.repeat elementTitle
276 "reference" -> elementTitle : List.repeat elementPara
277 "serie" -> List.repeat attributeName
278 "author" -> List.repeat attributeName
279 "editor" -> List.repeat attributeName
280 "org" -> List.repeat attributeName
281 "note" -> List.repeat elementPara
282 _ -> []
283 }
284 --
285 HeaderBar n _wh ->
286 Seq.singleton $
287 element "artwork" $
288 xmlAttrs (Seq.singleton $ Cell bn bn ("type", n)) <>
289 xmlify inh{inh_para=[]} ts
290 --
291 HeaderGreat n _wh ->
292 Seq.singleton $
293 let (attrs,body) = partitionAttrs ts in
294 element "quote" $
295 xmlAttrs (attrs `defaultAttr` Cell bn bn ("type", n)) <>
296 xmlify inh{inh_para=List.repeat elementPara} body
297 --
298 HeaderEqual n _wh ->
299 Seq.singleton $
300 Tree0 $ cell $ XmlAttr (xmlLocalName n) $
301 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
302 --
303 HeaderDot n ->
304 Seq.singleton $
305 element "li" $
306 xmlAttrs (Seq.singleton $ Cell bn bn{pos_column=pos_column bn + int (TL.length n)} ("name", n)) <>
307 xmlify inh ts
308 --
309 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
310 --
311 HeaderDashDash -> Seq.singleton $ Tree0 $ cell $ XmlComment $
312 -- debug1_ ("TS", ts) $
313 -- debug1_ ("RS", (S.evalState (Plain.rackUpLeft ts) Nothing)) $
314 Plain.document ts
315 -- Plain.text def $ S.evalState (Plain.rackUpLeft ts) Nothing
316 {-
317 TreeSeq.mapAlsoNode
318 (cell1 . unCell)
319 (\_k -> fmap $
320 TreeSeq.mapAlsoNode
321 (cell1 . unCell)
322 (\_k' -> cell1 . unCell)) <$> ts
323 -}
324 --
325 HeaderBrackets ident ->
326 let (attrs,body) = partitionAttrs ts in
327 Seq.singleton $
328 element "reference" $
329 xmlAttrs (setAttr (Cell en en ("id",ident)) attrs) <>
330 xmlify inh'{inh_para = elementTitle : elementTitle : List.repeat elementPara} body
331 where
332 inh' = inh{inh_figure = False}
333 --
334 HeaderDotSlash p ->
335 Seq.singleton $
336 element "include" $
337 xmlAttrs [cell ("href",TL.pack $ FP.replaceExtension p "dtc")] <>
338 xmlify inh ts
339 ----------------------
340 NodePair pair ->
341 case pair of
342 PairBracket | to <- Plain.document ts
343 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
344 Seq.singleton $
345 element "rref" $
346 xmlAttrs [cell ("to",to)]
347 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
348 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
349 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
350 PairFrenchquote ->
351 Seq.singleton $
352 element "q" $
353 xmlify inh ts
354 {-
355 case ts of
356 (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) ->
357 case Seq.viewr ls of
358 m :> Tree0 (Cell br er (TokenPlain r)) ->
359 xmlify inh $
360 Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l)))
361 <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r))))
362 _ ->
363 xmlify inh $
364 Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls
365 (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) ->
366 xmlify inh $
367 rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r)))
368 _ -> xmlify inh ts
369 -}
370 PairHash ->
371 Seq.singleton $
372 element "ref" $
373 xmlAttrs [cell ("to",Plain.document ts)]
374 PairElem name attrs ->
375 Seq.singleton $
376 element (xmlLocalName name) $
377 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
378 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
379 xmlify inh ts
380 _ ->
381 Seq.singleton (Tree0 $ Cell bn bn' $ XmlText open) `unionXml`
382 xmlify inh ts `unionXml`
383 Seq.singleton (Tree0 $ Cell en' en $ XmlText close)
384 where
385 (open, close) = pairBorders pair ts
386 bn' = bn{pos_column=pos_column bn + int (TL.length open)}
387 en' = en{pos_column=pos_column bn - int (TL.length close)}
388 ----------------------
389 NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
390 ----------------------
391 NodeToken tok ->
392 case tok of
393 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c
394 TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
395 TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)]
396 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
397 ----------------------
398 NodeLower n as ->
399 Seq.singleton $
400 element "artwork" $
401 xmlify inh ts
402 where
403 cell :: a -> Cell a
404 cell = Cell bn en
405 element :: XmlName -> XMLs -> XML
406 element n = Tree (cell $ XmlElem n)
407 instance Xmlify (Seq (Cell (XmlName,TL.Text))) where
408 xmlify _inh = xmlAttrs
409
410 -- * Elements
411
412 -- | Reserved elements' name
413 elems :: Set TL.Text
414 elems =
415 [ "about"
416 , "abstract"
417 , "address"
418 , "alias"
419 , "annotation"
420 , "area"
421 , "artwork"
422 , "aside"
423 , "audio"
424 , "author"
425 , "authors"
426 , "bcp14"
427 , "br"
428 , "call"
429 , "city"
430 , "code"
431 , "comment"
432 , "comments"
433 , "country"
434 , "date"
435 , "dd"
436 , "define"
437 , "del"
438 , "div"
439 , "dl"
440 , "document"
441 , "dt"
442 , "editor"
443 , "email"
444 , "embed"
445 , "eref"
446 , "fax"
447 , "feed"
448 , "feedback"
449 , "figure"
450 , "filter"
451 , "format"
452 , "from"
453 , "h"
454 , "hi"
455 , "html5"
456 , "i"
457 , "index"
458 , "iref"
459 , "keyword"
460 , "li"
461 , "link"
462 , "name"
463 , "note"
464 , "ol"
465 , "organization"
466 , "para"
467 , "postamble"
468 , "preamble"
469 , "q"
470 , "ref"
471 , "reference"
472 , "references"
473 , "region"
474 , "rref"
475 , "sc"
476 , "section"
477 , "serie"
478 , "source"
479 , "span"
480 , "street"
481 , "style"
482 , "sub"
483 , "sup"
484 , "table"
485 , "tbody"
486 , "td"
487 , "tel"
488 , "tfoot"
489 , "title"
490 , "th"
491 , "thead"
492 , "toc"
493 , "tof"
494 , "tr"
495 , "tt"
496 , "u"
497 , "ul"
498 , "uri"
499 , "version"
500 , "video"
501 , "workgroup"
502 , "xml"
503 , "zipcode"
504 ]
505
506 -- * Attributes
507
508 -- | Convenient alias, forcing the types
509 xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs
510 xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>)
511
512 -- | Extract attributes
513 partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots)
514 partitionAttrs ts = (attrs,cs)
515 where
516 (as,cs) = (`Seq.partition` ts) $ \case
517 Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n
518 _ -> False
519 attrs = attr <$> as
520 attr = \case
521 Tree (Cell bp ep (NodeHeader (HeaderEqual n _wh))) a ->
522 Cell bp ep (xmlLocalName n, v)
523 where
524 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
525 _ -> undefined
526
527 getAttrId :: Root -> TL.Text
528 getAttrId = Plain.document . Seq.singleton
529
530 setAttr ::
531 Cell (XmlName, TL.Text) ->
532 Seq (Cell (XmlName, TL.Text)) ->
533 Seq (Cell (XmlName, TL.Text))
534 setAttr a@(unCell -> (k, _v)) as =
535 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
536 Just idx -> Seq.update idx a as
537 Nothing -> a <| as
538
539 defaultAttr ::
540 Seq (Cell (XmlName, TL.Text)) ->
541 Cell (XmlName, TL.Text) ->
542 Seq (Cell (XmlName, TL.Text))
543 defaultAttr as a@(unCell -> (k, _v)) =
544 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
545 Just _idx -> as
546 Nothing -> a <| as
547
548 -- * Text
549
550 -- | Unify two 'XMLs', merging border 'XmlText's if any.
551 unionXml :: XMLs -> XMLs -> XMLs
552 unionXml x y =
553 case (Seq.viewr x, Seq.viewl y) of
554 (xs :> x0, y0 :< ys) ->
555 case (x0,y0) of
556 ( Tree0 (Cell bx ex (XmlText tx))
557 , Tree0 (Cell by ey (XmlText ty)) ) ->
558 xs `unionXml`
559 Seq.singleton (Tree0 $ (XmlText <$>) $ Cell bx ex tx <> Cell by ey ty) `unionXml`
560 ys
561 _ -> x <> y
562 (Seq.EmptyR, _) -> y
563 (_, Seq.EmptyL) -> x
564
565 unionsXml :: Foldable f => f XMLs -> XMLs
566 unionsXml = foldl' unionXml mempty