]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Write/XML.hs
Move <judgment/> into <about/>.
[doclang.git] / Hdoc / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Hdoc.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
22 import Data.Set (Set)
23 import Data.TreeSeq.Strict (Tree(..), tree0)
24 import Data.Tuple (uncurry)
25 import Prelude (Num(..), undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text.Lazy as TL
30 import qualified Hdoc.TCT.Write.Plain as Plain
31
32 -- import Hdoc.TCT.Debug
33 import Hdoc.TCT.Utils
34 import Hdoc.TCT as TCT hiding (Parser)
35 import Hdoc.XML (XML, XMLs)
36 import qualified Hdoc.XML as XML
37 import Text.Blaze.XML ()
38
39 -- | Main entry point
40 --
41 -- NOTE: 'XmlNode' are still annotated with 'Cell',
42 -- but nothing is done to preserve any ordering amongst them,
43 -- because 'Node's sometimes need to be reordered
44 -- (eg. about/title may have a title from the section before,
45 -- hence outside of about).
46 writeXML :: Roots -> XMLs
47 writeXML (tn@(Tree (Cell ss (NodeHeader (HeaderSection 1))) _ts) Seq.:<| rs) =
48 element "head" (xmlifySection def tn) <|
49 xmlify def rs
50 where
51 element :: XML.Name -> XMLs -> XML
52 element n = Tree (Cell ss $ XML.NodeElem n)
53 writeXML roots = xmlify def roots
54
55 -- | Generate the content of <section> or <head>.
56 xmlifySection :: Inh -> Root -> XMLs
57 xmlifySection inh tn@(Tree (Cell ss _nt) _ts) =
58 about <>
59 xmlify inh' body
60 where
61 element :: XML.Name -> XMLs -> XML
62 element n = Tree (Cell ss $ XML.NodeElem n)
63 inh' = inh
64 { inh_para = List.repeat elementPara
65 , inh_figure = True
66 }
67 (titles, content) = partitionSection tn
68 (attrs, body) = partitionAttrs content
69 about =
70 case Seq.viewl titles of
71 EmptyL -> mempty
72 title@(unTree -> ct) :< subtitles ->
73 (xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>) $
74 Seq.singleton $
75 element "about" $
76 xmlify inh{inh_para=List.repeat elementTitle} title <>
77 aliases
78 where
79 aliases =
80 subtitles >>= \subtitle@(unTree -> cs) ->
81 return $
82 Tree (cs $> XML.NodeElem "alias") $
83 xmlAttrs [ct $> ("id",getAttrId subtitle)] <>
84 xmlify inh{inh_para=List.repeat elementTitle} subtitle
85
86 -- * Type 'Inh'
87 data Inh
88 = Inh
89 { inh_figure :: Bool
90 , inh_para :: [Inh -> Root -> XML]
91 }
92 instance Default Inh where
93 def = Inh
94 { inh_figure = False
95 , inh_para = List.repeat elementPara
96 }
97
98 -- ** 'inh_para'
99 elementPara :: Inh -> Root -> XML
100 elementPara inh (Tree c ts) = Tree (XML.NodeElem "para" <$ c) $ xmlify inh ts
101
102 elementTitle :: Inh -> Root -> XML
103 elementTitle inh (Tree c ts) = Tree (XML.NodeElem "title" <$ c) $ xmlify inh ts
104 elementTitleWith :: Attrs -> Inh -> Root -> XML
105 elementTitleWith attrs inh (Tree c ts) = Tree (XML.NodeElem "title" <$ c) $ xmlAttrs attrs <> xmlify inh ts
106
107 elementName :: Inh -> Root -> XML
108 elementName inh (Tree c ts) = Tree (XML.NodeElem "name" <$ c) $ xmlify inh ts
109
110 attributeName :: Inh -> Root -> XML
111 attributeName _inh (Tree c ts) = tree0 (XML.NodeAttr "name" (Plain.writePlain ts) <$ c)
112
113 attributeId :: Inh -> Root -> XML
114 attributeId _inh (Tree c ts) = tree0 (XML.NodeAttr "id" (Plain.writePlain ts) <$ c)
115
116 -- * Class 'Xmlify'
117 class Xmlify a where
118 xmlify :: Inh -> a -> XMLs
119 instance Xmlify Roots where
120 xmlify inh roots =
121 case Seq.viewl roots of
122 EmptyL -> mempty
123 r@(Tree cr@(Cell _sr nr) ts) :< rs ->
124 case nr of
125 ----------------------
126 -- NOTE: HeaderColon becomes parent
127 -- of any continuous following-sibling HeaderBar or HeaderGreat
128 NodeHeader (HeaderColon n _wh)
129 | (span, rest) <- spanlHeaderColon rs
130 , not $ null span ->
131 xmlify inh (Tree cr (ts<>span)) <>
132 xmlify inh rest
133 where
134 spanlHeaderColon :: Roots -> (Roots, Roots)
135 spanlHeaderColon =
136 Seq.spanl $ \case
137 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
138 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
139 _ -> False
140 ----------------------
141 -- NOTE: gather HeaderBrackets
142 NodeHeader HeaderBrackets{}
143 | (span,rest) <- spanlBrackets roots
144 , not (null span) ->
145 (<| xmlify inh rest) $
146 element "references" $
147 span >>= xmlify inh
148 where
149 spanlBrackets :: Roots -> (Roots, Roots)
150 spanlBrackets =
151 Seq.spanl $ \case
152 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
153 _ -> False
154 ----------------------
155 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
156 NodeText x
157 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
158 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
159 ----------------------
160 -- NOTE: detect (some text)[http://some.url] or (some text)[SomeRef]
161 NodePair PairParen
162 | Tree (Cell sb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
163 (<| xmlify inh rs') $
164 case bracket of
165 (toList -> [unTree -> Cell sl (NodeToken (TokenLink lnk))]) ->
166 element "eref" $
167 xmlAttrs [Cell sl ("to",lnk)] <>
168 xmlify inh ts
169 _ ->
170 element "ref" $
171 xmlAttrs [Cell sb ("to",Plain.writePlain bracket)] <>
172 if null ts -- NOTE: preserve empty parens
173 then Seq.singleton $ tree0 (XML.NodeText "" <$ cr)
174 else xmlify inh ts
175 ----------------------
176 -- NOTE: gather HeaderDash
177 _ | (span, rest) <- spanlItems (==HeaderDash) roots
178 , not $ null span ->
179 (<| xmlify inh rest) $
180 element "ul" $
181 span >>= xmlify inh{inh_para=List.repeat elementPara}
182 ----------------------
183 -- NOTE: gather HeaderDot
184 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
185 , not $ null span ->
186 (<| xmlify inh rest) $
187 element "ol" $
188 span >>= xmlify inh{inh_para=List.repeat elementPara}
189 where
190 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
191 spanlItems liHeader =
192 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
193 case nod of
194 NodeHeader (HeaderColon "li" _wh) -> True
195 NodeHeader hdr -> liHeader hdr
196 NodePair (PairElem "li" _as) -> True
197 _ -> False
198 ----------------------
199 NodePara | para:inh_para <- inh_para inh ->
200 para inh r <|
201 xmlify inh{inh_para} rs
202 ----------------------
203 -- NOTE: context-free Root
204 _ ->
205 xmlify inh r `unionXml`
206 xmlify inh rs
207 where
208 element :: XML.Name -> XMLs -> XML
209 element n = Tree (XML.NodeElem n <$ cr)
210 instance Xmlify Root where
211 xmlify inh tn@(Tree (Cell ss@(sn:|ssn) nod) ts) =
212 case nod of
213 ----------------------
214 NodePara ->
215 case inh_para inh of
216 [] -> xmlify inh ts
217 para:_ -> Seq.singleton $ para inh tn
218 ----------------------
219 NodeHeader hdr ->
220 case hdr of
221 --
222 HeaderSection{} ->
223 Seq.singleton $
224 element "section" $
225 xmlifySection inh tn
226 --
227 HeaderColon n _wh ->
228 let (attrs,body) = partitionAttrs ts in
229 case n of
230 -- NOTE: disable 'inh_figure'
231 "about" ->
232 Seq.singleton $
233 element "about" $
234 xmlAttrs attrs <>
235 xmlify inh'{inh_figure=False} body
236 -- NOTE: handle judgment
237 _ | n`List.elem`elemsJudgment -> -- FIXME: not a special case so far.
238 Seq.singleton $
239 element (XML.localName n) $
240 xmlAttrs attrs <>
241 xmlify inh'' body
242 where
243 inh'' = inh'
244 { inh_para =
245 case n of
246 "grades" -> List.repeat attributeId
247 "judges" -> List.repeat attributeId
248 _ -> List.repeat elementTitle
249 }
250 -- NOTE: in <figure> mode, unreserved elements become <figure>
251 _ | inh_figure inh && n`List.notElem`elems || TL.null n ->
252 Seq.singleton $
253 element "figure" $
254 -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <>
255 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_begin sn}:|ssn) ("type", n)) <>
256 case toList body of
257 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
258 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
259 -- NOTE: reserved elements
260 _ ->
261 Seq.singleton $
262 element (XML.localName n) $
263 xmlAttrs attrs <>
264 xmlify inh' body
265 where
266 inh' = inh
267 { inh_para =
268 case n of
269 "about" -> List.repeat elementTitle
270 "reference" -> List.repeat elementTitle
271 "serie" -> List.repeat attributeName
272 "author" -> List.repeat attributeName
273 "editor" -> List.repeat attributeName
274 "org" -> List.repeat attributeName
275 "note" -> List.repeat elementPara
276 _ -> []
277 }
278 --
279 HeaderBar n wh ->
280 if inh_figure inh && n`List.notElem`elems || TL.null n
281 then
282 Seq.singleton $
283 element "artwork" $
284 xmlAttrs (Seq.singleton $ Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
285 xmlify inh{inh_para=[]} ts
286 else
287 xmlify inh $
288 Tree (cell $ NodeHeader $ HeaderColon n wh) ts
289 --
290 HeaderGreat n _wh ->
291 Seq.singleton $
292 let (attrs,body) = partitionAttrs ts in
293 element "quote" $
294 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
295 xmlify inh{inh_para=List.repeat elementPara} body
296 --
297 HeaderEqual n _wh ->
298 Seq.singleton $
299 Tree0 $ cell $ XML.NodeAttr (XML.localName n) $
300 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
301 --
302 HeaderDot n ->
303 Seq.singleton $
304 element "li" $
305 let span_end = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length n)} in
306 xmlAttrs (Seq.singleton $ Cell (sn{span_end}:|ssn) ("name", n)) <>
307 xmlify inh ts
308 --
309 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
310 --
311 HeaderDashDash ->
312 Seq.singleton $ Tree0 $ cell $
313 XML.NodeComment $ Plain.writePlain ts
314 --
315 HeaderBrackets ident ->
316 let (attrs,body) = partitionAttrs ts in
317 Seq.singleton $
318 element "reference" $
319 xmlAttrs (setAttr (Cell (sn{span_end=span_end sn}:|ssn) ("id",ident)) attrs) |>
320 element "about" (
321 xmlify inh'{inh_para = List.repeat elementTitle} body
322 )
323 where
324 inh' = inh{inh_figure = False}
325 --
326 HeaderDotSlash _file -> xmlify inh ts
327 ----------------------
328 NodePair pair ->
329 case pair of
330 PairBracket | to <- Plain.writePlain ts
331 , TL.all (\c -> c/='[' && c/=']' && Char.isPrint c && not (Char.isSpace c)) to ->
332 Seq.singleton $
333 element "ref" $
334 xmlAttrs [cell ("to",to)]
335 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
336 PairDash -> Seq.singleton $ element "del" $ xmlify inh ts
337 PairUnderscore -> Seq.singleton $ element "u" $ xmlify inh ts
338 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
339 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
340 PairFrenchquote ->
341 Seq.singleton $
342 element "q" $
343 case ts of
344 (Seq.viewl -> Tree0 (Cell sl (NodeToken (TokenText l))) :< ls) ->
345 case Seq.viewr ls of
346 m :> Tree0 (Cell sr (NodeToken (TokenText r))) ->
347 xmlify inh $
348 Tree0 (Cell sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l))))
349 Seq.<|(m Seq.|>Tree0 (Cell sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r)))))
350 _ ->
351 xmlify inh $
352 Tree0 (Cell sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls
353 (Seq.viewr -> rs :> Tree0 (Cell sr (NodeToken (TokenText r)))) ->
354 xmlify inh $
355 rs Seq.|> Tree0 (Cell sr (NodeToken (TokenText (TL.dropAround Char.isSpace r))))
356 _ -> xmlify inh ts
357 PairTag isBackref ->
358 Seq.singleton $
359 element (if isBackref then "tag-back" else "tag") $
360 xmlAttrs [cell ("to",Plain.writePlain ts)]
361 -- xmlAttrs [cell ("to",to)]
362 -- xmlify inh{inh_para=[]} ts
363 -- xmlAttrs [cell ("to",Plain.writePlain ts)]
364 PairAt isBackref ->
365 Seq.singleton $
366 element (if isBackref then "at-back" else "at") $
367 xmlAttrs [cell ("to",Plain.writePlain ts)]
368 PairElem name attrs ->
369 Seq.singleton $
370 element (XML.localName name) $
371 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
372 cell (XML.localName elemAttr_name,elemAttr_value)) <$> attrs) <>
373 xmlify inh ts
374 _ ->
375 Seq.singleton (Tree0 $ Cell (sn{span_end=bn'}:|ssn) $ XML.NodeText open) `unionXml`
376 xmlify inh ts `unionXml`
377 Seq.singleton (Tree0 $ Cell (sn{span_begin=en'}:|ssn) $ XML.NodeText close)
378 where
379 (open, close) = pairBorders pair ts
380 bn' = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length open)}
381 en' = (span_end sn){pos_column=pos_column (span_end sn) - int (TL.length close)}
382 ----------------------
383 NodeText t -> Seq.singleton $ Tree0 $ cell $ XML.NodeText t
384 ----------------------
385 NodeToken tok ->
386 case tok of
387 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XML.NodeText $ TL.singleton c
388 TokenText t -> Seq.singleton $ Tree0 $ cell $ XML.NodeText t
389 TokenAt b to -> Seq.singleton $ element (if b then "at-back" else "at") $
390 xmlAttrs [cell ("to",to)] -- Seq.singleton $ Tree0 $ cell $ XML.NodeText to
391 TokenTag b to -> Seq.singleton $ element (if b then "tag-back" else "tag") $
392 xmlAttrs [cell ("to",to)] -- Seq.singleton $ Tree0 $ cell $ XML.NodeText to
393 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
394 ----------------------
395 NodeLower n as ->
396 Seq.singleton $
397 element "artwork" $
398 xmlify inh ts
399 where
400 cell :: a -> Cell a
401 cell = Cell ss
402 element :: XML.Name -> XMLs -> XML
403 element n = Tree (cell $ XML.NodeElem n)
404 {-
405 instance Xmlify (Seq (Cell (XML.Name,TL.Text))) where
406 xmlify _inh = xmlAttrs
407 -}
408
409 -- * Elements
410
411 -- | Reserved elements' name
412 elems :: Set TL.Text -- TODO: use a @data@ or maybe a @data family@ instead of 'TL.Text'
413 elems =
414 [ "about"
415 , "abstract"
416 , "address"
417 , "alias"
418 , "annotation"
419 , "area"
420 , "artwork"
421 , "aside"
422 , "at"
423 , "at-back"
424 , "audio"
425 , "author"
426 , "authors"
427 , "bcp14"
428 , "br"
429 , "break"
430 , "call"
431 , "city"
432 , "code"
433 , "comment"
434 , "comments"
435 , "country"
436 , "date"
437 , "dd"
438 , "default"
439 , "define"
440 , "del"
441 , "div"
442 , "dl"
443 , "document"
444 , "dt"
445 , "editor"
446 , "email"
447 , "embed"
448 , "eref"
449 , "fax"
450 , "feed"
451 , "feedback"
452 , "figure"
453 , "filter"
454 , "format"
455 , "from"
456 , "h"
457 , "head"
458 , "hi"
459 , "html5"
460 , "i"
461 , "index"
462 , "iref"
463 , "keyword"
464 , "li"
465 , "link"
466 , "name"
467 , "note"
468 , "ol"
469 , "organization"
470 , "para"
471 , "postamble"
472 , "preamble"
473 , "q"
474 , "ref"
475 , "reference"
476 , "references"
477 , "refs"
478 , "region"
479 , "sc"
480 , "section"
481 , "serie"
482 , "source"
483 , "span"
484 , "street"
485 , "style"
486 , "sub"
487 , "sup"
488 , "table"
489 , "tag"
490 , "tag-back"
491 , "tbody"
492 , "td"
493 , "tel"
494 , "tfoot"
495 , "th"
496 , "thead"
497 , "title"
498 , "toc"
499 , "tof"
500 , "tr"
501 , "tt"
502 , "u"
503 , "ul"
504 , "uri"
505 , "version"
506 , "video"
507 , "workgroup"
508 , "xml"
509 , "zipcode"
510 ]
511
512 elemsJudgment :: Set TL.Text
513 elemsJudgment =
514 [ "choice"
515 , "grade"
516 , "grades"
517 , "judge"
518 , "judges"
519 , "judgment"
520 , "opinion"
521 ]
522
523 -- * Attributes
524
525 type Attrs = Seq (Cell (XML.Name, TL.Text))
526
527 -- | Convenient alias, forcing the types
528 xmlAttrs :: Attrs -> XMLs
529 xmlAttrs = (Tree0 . (uncurry XML.NodeAttr <$>) <$>)
530
531 -- | Extract section titles
532 partitionSection :: Root -> (Roots, Roots)
533 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
534 case Seq.viewl body of
535 EmptyL -> mempty
536 title@(unTree -> Cell (Span{span_end=et}:|_) NodePara) :< rest ->
537 let (subtitles, content) = spanlSubtitles et rest in
538 (title <| (subtitles >>= subTrees), content)
539 where
540 spanlSubtitles ep ts =
541 case Seq.viewl ts of
542 sub@(unTree -> Cell (Span{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs
543 | lvlSub <= lvlPar
544 , pos_line span_begin - pos_line ep <= 1 ->
545 let (subs, ts') = spanlSubtitles span_end rs in
546 (sub <| subs, ts')
547 _ -> (mempty, ts)
548 _ -> (mempty, body)
549 partitionSection _ = mempty
550
551 -- | Extract attributes
552 partitionAttrs :: Roots -> (Seq (Cell (XML.Name, TL.Text)), Roots)
553 partitionAttrs ts = (attrs,cs)
554 where
555 (as,cs) = (`Seq.partition` ts) $ \case
556 Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n
557 _ -> False
558 attrs = attr <$> as
559 attr = \case
560 Tree (Cell loc (NodeHeader (HeaderEqual n _wh))) a ->
561 Cell loc (XML.localName n, v)
562 where
563 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
564 _ -> undefined
565
566 getAttrId :: Root -> TL.Text
567 getAttrId = Plain.writePlain . Seq.singleton
568
569 setAttr ::
570 Cell (XML.Name, TL.Text) ->
571 Seq (Cell (XML.Name, TL.Text)) ->
572 Seq (Cell (XML.Name, TL.Text))
573 setAttr a@(unCell -> (k, _v)) as =
574 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
575 Just idx -> Seq.update idx a as
576 Nothing -> a <| as
577
578 defaultAttr ::
579 Seq (Cell (XML.Name, TL.Text)) ->
580 Cell (XML.Name, TL.Text) ->
581 Seq (Cell (XML.Name, TL.Text))
582 defaultAttr as a@(unCell -> (k, _v)) =
583 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
584 Just _idx -> as
585 Nothing -> a <| as
586
587 -- * Text
588
589 -- | Unify two 'XMLs', merging border 'XML.NodeText's if any.
590 unionXml :: XMLs -> XMLs -> XMLs
591 unionXml x y =
592 case (Seq.viewr x, Seq.viewl y) of
593 (xs :> x0, y0 :< ys) ->
594 case (x0,y0) of
595 ( Tree0 (Cell ssx@(Span {span_file=fx}:|_sx) (XML.NodeText tx))
596 , Tree0 (Cell ssy@(Span {span_file=fy}:|_sy) (XML.NodeText ty)) ) | fx == fy ->
597 xs `unionXml`
598 Seq.singleton (Tree0 $ (XML.NodeText <$>) $ Cell ssx tx <> Cell ssy ty) `unionXml`
599 ys
600 _ -> x <> y
601 (Seq.EmptyR, _) -> y
602 (_, Seq.EmptyL) -> x
603
604 unionsXml :: Foldable f => f XMLs -> XMLs
605 unionsXml = foldl' unionXml mempty