]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Write/XML.hs
Add Majority Judgment support.
[doclang.git] / Hdoc / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Hdoc.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
22 import Data.Set (Set)
23 import Data.TreeSeq.Strict (Tree(..), tree0)
24 import Data.Tuple (uncurry)
25 import Prelude (Num(..), undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text.Lazy as TL
30 import qualified Hdoc.TCT.Write.Plain as Plain
31
32 -- import Hdoc.TCT.Debug
33 import Hdoc.TCT.Utils
34 import Hdoc.TCT hiding (Parser)
35 import Hdoc.XML
36 import Text.Blaze.XML ()
37
38 -- | Main entry point
39 --
40 -- NOTE: 'XmlNode' are still annotated with 'Cell',
41 -- but nothing is done to preserve any ordering amongst them,
42 -- because 'Node's sometimes need to be reordered
43 -- (eg. about/title may have a title from the section before,
44 -- hence outside of about).
45 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
46 writeXML :: Roots -> XMLs
47 writeXML doc =
48 -- (`S.evalState` def) $
49 case Seq.viewl doc of
50 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
51 let (titles, content) = partitionSection sec in
52 case Seq.viewl titles of
53 (unTree -> Cell st _) :< _ ->
54 xmlify def
55 { inh_titles = titles
56 , inh_figure = True
57 } contentWithAbout <>
58 xmlify def foot
59 where
60 contentWithAbout =
61 case Seq.findIndexL isAbout content of
62 Nothing -> Tree (Cell st $ NodeHeader $ HeaderColon "about" "") mempty <| content
63 Just{} -> content
64 isAbout = \case
65 (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True
66 _ -> False
67 _ -> xmlify def doc
68 _ -> xmlify def doc
69
70 -- * Type 'Inh'
71 data Inh
72 = Inh
73 { inh_figure :: Bool
74 , inh_para :: [Inh -> Root -> XML]
75 , inh_titles :: Roots
76 }
77 instance Default Inh where
78 def = Inh
79 { inh_figure = False
80 , inh_para = List.repeat elementPara
81 , inh_titles = mempty
82 }
83
84 -- ** 'inh_para'
85 elementPara :: Inh -> Root -> XML
86 elementPara inh (Tree c ts) = Tree (XmlElem "para" <$ c) $ xmlify inh ts
87
88 elementTitle :: Inh -> Root -> XML
89 elementTitle inh (Tree c ts) = Tree (XmlElem "title" <$ c) $ xmlify inh ts
90
91 elementName :: Inh -> Root -> XML
92 elementName inh (Tree c ts) = Tree (XmlElem "name" <$ c) $ xmlify inh ts
93
94 attributeName :: Inh -> Root -> XML
95 attributeName _inh (Tree c ts) = tree0 (XmlAttr "name" (Plain.writePlain ts) <$ c)
96
97 attributeId :: Inh -> Root -> XML
98 attributeId _inh (Tree c ts) = tree0 (XmlAttr "id" (Plain.writePlain ts) <$ c)
99
100 -- * Class 'Xmlify'
101 class Xmlify a where
102 xmlify :: Inh -> a -> XMLs
103 instance Xmlify Roots where
104 xmlify inh roots =
105 case Seq.viewl roots of
106 EmptyL -> mempty
107 r@(Tree cr@(Cell _sr nr) ts) :< rs ->
108 case nr of
109 ----------------------
110 -- NOTE: HeaderColon becomes parent
111 -- of any continuous following-sibling HeaderBar or HeaderGreat
112 NodeHeader (HeaderColon n _wh)
113 | (span, rest) <- spanlHeaderColon rs
114 , not $ null span ->
115 xmlify inh (Tree cr (ts<>span)) <>
116 xmlify inh rest
117 where
118 spanlHeaderColon :: Roots -> (Roots, Roots)
119 spanlHeaderColon =
120 Seq.spanl $ \case
121 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
122 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
123 _ -> False
124 ----------------------
125 -- NOTE: gather HeaderBrackets
126 NodeHeader HeaderBrackets{}
127 | (span,rest) <- spanlBrackets roots
128 , not (null span) ->
129 (<| xmlify inh rest) $
130 element "references" $
131 span >>= xmlify inh
132 where
133 spanlBrackets :: Roots -> (Roots, Roots)
134 spanlBrackets =
135 Seq.spanl $ \case
136 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
137 _ -> False
138 ----------------------
139 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
140 NodeText x
141 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
142 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
143 ----------------------
144 -- NOTE: detect [some text](http://some.url) or [SomeRef]
145 NodePair PairParen
146 | Tree (Cell sb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
147 (<| xmlify inh rs') $
148 case bracket of
149 (toList -> [unTree -> Cell sl (NodeToken (TokenLink lnk))]) ->
150 element "eref" $
151 xmlAttrs [Cell sl ("to",lnk)] <>
152 xmlify inh ts
153 _ ->
154 element "rref" $
155 xmlAttrs [Cell sb ("to",Plain.writePlain bracket)] <>
156 xmlify inh ts
157 ----------------------
158 -- NOTE: gather HeaderDash
159 _ | (span, rest) <- spanlItems (==HeaderDash) roots
160 , not $ null span ->
161 (<| xmlify inh rest) $
162 element "ul" $
163 span >>= xmlify inh{inh_para=List.repeat elementPara}
164 ----------------------
165 -- NOTE: gather HeaderDot
166 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
167 , not $ null span ->
168 (<| xmlify inh rest) $
169 element "ol" $
170 span >>= xmlify inh{inh_para=List.repeat elementPara}
171 where
172 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
173 spanlItems liHeader =
174 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
175 case nod of
176 NodeHeader (HeaderColon "li" _wh) -> True
177 NodeHeader hdr -> liHeader hdr
178 NodePair (PairElem "li" _as) -> True
179 _ -> False
180 ----------------------
181 NodePara | para:inh_para <- inh_para inh ->
182 para inh r <|
183 xmlify inh{inh_para} rs
184 ----------------------
185 -- NOTE: context-free Root
186 _ ->
187 xmlify inh r `unionXml`
188 xmlify inh rs
189 where
190 element :: XmlName -> XMLs -> XML
191 element n = Tree (XmlElem n <$ cr)
192 instance Xmlify Root where
193 xmlify inh tn@(Tree (Cell ss@(sn:|ssn) nod) ts) =
194 case nod of
195 ----------------------
196 NodePara ->
197 case inh_para inh of
198 [] -> xmlify inh ts
199 para:_ -> Seq.singleton $ para inh tn
200 ----------------------
201 NodeHeader hdr ->
202 case hdr of
203 --
204 HeaderSection{} ->
205 Seq.singleton $
206 element "section" $ head <> xmlify inh' body
207 where
208 (titles, content) = partitionSection tn
209 (attrs, body) = partitionAttrs content
210 head =
211 case Seq.viewl titles of
212 EmptyL -> mempty
213 title@(unTree -> ct) :< subtitles ->
214 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
215 xmlify inh{inh_para=List.repeat elementTitle} title <>
216 aliases
217 where
218 aliases =
219 subtitles >>= \subtitle@(unTree -> cs) ->
220 return $
221 Tree (cs $> XmlElem "alias") $
222 xmlAttrs [cs $> ("id",getAttrId subtitle)]
223 inh' = inh
224 { inh_para = List.repeat elementPara
225 , inh_figure = True
226 }
227 --
228 HeaderColon n _wh ->
229 let (attrs,body) = partitionAttrs ts in
230 case n of
231 -- NOTE: insert titles into <about>
232 "about" ->
233 Seq.singleton $
234 element "about" $
235 xmlify inh' (inh_titles inh) <>
236 xmlAttrs attrs <>
237 xmlify inh'{inh_figure=False} body
238 -- NOTE: handle judgment
239 _ | n`List.elem`elemsJudgment -> -- FIXME: not a special case so far.
240 Seq.singleton $
241 element (xmlLocalName n) $
242 xmlAttrs attrs <>
243 xmlify inh'' body
244 where
245 inh'' = inh'
246 { inh_para =
247 case n of
248 "grades" -> List.repeat attributeId
249 "judges" -> List.repeat attributeId
250 _ -> List.repeat elementTitle
251 }
252 -- NOTE: in <figure> mode, unreserved elements become <figure>
253 _ | inh_figure inh && n`List.notElem`elems || TL.null n ->
254 Seq.singleton $
255 element "figure" $
256 -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <>
257 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_begin sn}:|ssn) ("type", n)) <>
258 case toList body of
259 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
260 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
261 -- NOTE: reserved elements
262 _ ->
263 Seq.singleton $
264 element (xmlLocalName n) $
265 xmlAttrs attrs <>
266 xmlify inh' body
267 where
268 inh' = inh
269 { inh_para =
270 case n of
271 "about" -> List.repeat elementTitle
272 "reference" -> elementTitle : List.repeat elementPara
273 "serie" -> List.repeat attributeName
274 "author" -> List.repeat attributeName
275 "editor" -> List.repeat attributeName
276 "org" -> List.repeat attributeName
277 "note" -> List.repeat elementPara
278 _ -> []
279 }
280 --
281 HeaderBar n wh ->
282 if inh_figure inh && n`List.notElem`elems || TL.null n
283 then
284 Seq.singleton $
285 element "artwork" $
286 xmlAttrs (Seq.singleton $ Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
287 xmlify inh{inh_para=[]} ts
288 else
289 xmlify inh $
290 Tree (cell $ NodeHeader $ HeaderColon n wh) ts
291 --
292 HeaderGreat n _wh ->
293 Seq.singleton $
294 let (attrs,body) = partitionAttrs ts in
295 element "quote" $
296 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
297 xmlify inh{inh_para=List.repeat elementPara} body
298 --
299 HeaderEqual n _wh ->
300 Seq.singleton $
301 Tree0 $ cell $ XmlAttr (xmlLocalName n) $
302 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
303 --
304 HeaderDot n ->
305 Seq.singleton $
306 element "li" $
307 let span_end = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length n)} in
308 xmlAttrs (Seq.singleton $ Cell (sn{span_end}:|ssn) ("name", n)) <>
309 xmlify inh ts
310 --
311 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
312 --
313 HeaderDashDash ->
314 Seq.singleton $ Tree0 $ cell $
315 XmlComment $ Plain.writePlain ts
316 --
317 HeaderBrackets ident ->
318 let (attrs,body) = partitionAttrs ts in
319 Seq.singleton $
320 element "reference" $
321 xmlAttrs (setAttr (Cell (sn{span_end=span_end sn}:|ssn) ("id",ident)) attrs) <>
322 xmlify inh'{inh_para = elementTitle : elementTitle : List.repeat elementPara} body
323 where
324 inh' = inh{inh_figure = False}
325 --
326 HeaderDotSlash _file -> xmlify inh ts
327 ----------------------
328 NodePair pair ->
329 case pair of
330 PairBracket | to <- Plain.writePlain ts
331 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
332 Seq.singleton $
333 element "rref" $
334 xmlAttrs [cell ("to",to)]
335 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
336 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
337 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
338 PairFrenchquote ->
339 Seq.singleton $
340 element "q" $
341 case ts of
342 (Seq.viewl -> Tree0 (Cell sl (NodeToken (TokenText l))) :< ls) ->
343 case Seq.viewr ls of
344 m :> Tree0 (Cell sr (NodeToken (TokenText r))) ->
345 xmlify inh $
346 Tree0 (Cell sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l))))
347 Seq.<|(m Seq.|>Tree0 (Cell sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r)))))
348 _ ->
349 xmlify inh $
350 Tree0 (Cell sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls
351 (Seq.viewr -> rs :> Tree0 (Cell sr (NodeToken (TokenText r)))) ->
352 xmlify inh $
353 rs Seq.|> Tree0 (Cell sr (NodeToken (TokenText (TL.dropAround Char.isSpace r))))
354 _ -> xmlify inh ts
355 PairHash ->
356 Seq.singleton $
357 element "ref" $
358 xmlAttrs [cell ("to",Plain.writePlain ts)]
359 PairElem name attrs ->
360 Seq.singleton $
361 element (xmlLocalName name) $
362 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
363 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
364 xmlify inh ts
365 _ ->
366 Seq.singleton (Tree0 $ Cell (sn{span_end=bn'}:|ssn) $ XmlText open) `unionXml`
367 xmlify inh ts `unionXml`
368 Seq.singleton (Tree0 $ Cell (sn{span_begin=en'}:|ssn) $ XmlText close)
369 where
370 (open, close) = pairBorders pair ts
371 bn' = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length open)}
372 en' = (span_end sn){pos_column=pos_column (span_end sn) - int (TL.length close)}
373 ----------------------
374 NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
375 ----------------------
376 NodeToken tok ->
377 case tok of
378 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c
379 TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
380 TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)]
381 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
382 ----------------------
383 NodeLower n as ->
384 Seq.singleton $
385 element "artwork" $
386 xmlify inh ts
387 where
388 cell :: a -> Cell a
389 cell = Cell ss
390 element :: XmlName -> XMLs -> XML
391 element n = Tree (cell $ XmlElem n)
392 instance Xmlify (Seq (Cell (XmlName,TL.Text))) where
393 xmlify _inh = xmlAttrs
394
395 -- * Elements
396
397 -- | Reserved elements' name
398 elems :: Set TL.Text
399 elems =
400 [ "about"
401 , "abstract"
402 , "address"
403 , "alias"
404 , "annotation"
405 , "area"
406 , "artwork"
407 , "aside"
408 , "audio"
409 , "author"
410 , "authors"
411 , "bcp14"
412 , "br"
413 , "break"
414 , "call"
415 , "city"
416 , "code"
417 , "comment"
418 , "comments"
419 , "country"
420 , "date"
421 , "dd"
422 , "default"
423 , "define"
424 , "del"
425 , "div"
426 , "dl"
427 , "document"
428 , "dt"
429 , "editor"
430 , "email"
431 , "embed"
432 , "eref"
433 , "fax"
434 , "feed"
435 , "feedback"
436 , "figure"
437 , "filter"
438 , "format"
439 , "from"
440 , "h"
441 , "hi"
442 , "html5"
443 , "i"
444 , "index"
445 , "iref"
446 , "keyword"
447 , "li"
448 , "link"
449 , "name"
450 , "note"
451 , "ol"
452 , "organization"
453 , "para"
454 , "postamble"
455 , "preamble"
456 , "q"
457 , "ref"
458 , "reference"
459 , "references"
460 , "region"
461 , "rref"
462 , "sc"
463 , "section"
464 , "serie"
465 , "source"
466 , "span"
467 , "street"
468 , "style"
469 , "sub"
470 , "sup"
471 , "table"
472 , "tbody"
473 , "td"
474 , "tel"
475 , "tfoot"
476 , "th"
477 , "thead"
478 , "title"
479 , "toc"
480 , "tof"
481 , "tr"
482 , "tt"
483 , "u"
484 , "ul"
485 , "uri"
486 , "version"
487 , "video"
488 , "workgroup"
489 , "xml"
490 , "zipcode"
491 ]
492
493 elemsJudgment :: Set TL.Text
494 elemsJudgment =
495 [ "choice"
496 , "grade"
497 , "grades"
498 , "judge"
499 , "judges"
500 , "judgment"
501 , "opinion"
502 ]
503
504 -- * Attributes
505
506 -- | Convenient alias, forcing the types
507 xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs
508 xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>)
509
510 -- | Extract section titles
511 partitionSection :: Root -> (Roots, Roots)
512 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
513 case Seq.viewl body of
514 EmptyL -> mempty
515 title@(unTree -> Cell (Span{span_end=et}:|_) NodePara) :< rest ->
516 let (subtitles, content) = spanlSubtitles et rest in
517 (title <| (subtitles >>= subTrees), content)
518 where
519 spanlSubtitles ep ts =
520 case Seq.viewl ts of
521 sub@(unTree -> Cell (Span{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs
522 | lvlSub <= lvlPar
523 , pos_line span_begin - pos_line ep <= 1 ->
524 let (subs, ts') = spanlSubtitles span_end rs in
525 (sub <| subs, ts')
526 _ -> (mempty, ts)
527 _ -> (mempty, body)
528 partitionSection _ = mempty
529
530 -- | Extract attributes
531 partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots)
532 partitionAttrs ts = (attrs,cs)
533 where
534 (as,cs) = (`Seq.partition` ts) $ \case
535 Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n
536 _ -> False
537 attrs = attr <$> as
538 attr = \case
539 Tree (Cell ssn (NodeHeader (HeaderEqual n _wh))) a ->
540 Cell ssn (xmlLocalName n, v)
541 where
542 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
543 _ -> undefined
544
545 getAttrId :: Root -> TL.Text
546 getAttrId = Plain.writePlain . Seq.singleton
547
548 setAttr ::
549 Cell (XmlName, TL.Text) ->
550 Seq (Cell (XmlName, TL.Text)) ->
551 Seq (Cell (XmlName, TL.Text))
552 setAttr a@(unCell -> (k, _v)) as =
553 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
554 Just idx -> Seq.update idx a as
555 Nothing -> a <| as
556
557 defaultAttr ::
558 Seq (Cell (XmlName, TL.Text)) ->
559 Cell (XmlName, TL.Text) ->
560 Seq (Cell (XmlName, TL.Text))
561 defaultAttr as a@(unCell -> (k, _v)) =
562 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
563 Just _idx -> as
564 Nothing -> a <| as
565
566 -- * Text
567
568 -- | Unify two 'XMLs', merging border 'XmlText's if any.
569 unionXml :: XMLs -> XMLs -> XMLs
570 unionXml x y =
571 case (Seq.viewr x, Seq.viewl y) of
572 (xs :> x0, y0 :< ys) ->
573 case (x0,y0) of
574 ( Tree0 (Cell ssx@(Span {span_file=fx}:|_sx) (XmlText tx))
575 , Tree0 (Cell ssy@(Span {span_file=fy}:|_sy) (XmlText ty)) ) | fx == fy ->
576 xs `unionXml`
577 Seq.singleton (Tree0 $ (XmlText <$>) $ Cell ssx tx <> Cell ssy ty) `unionXml`
578 ys
579 _ -> x <> y
580 (Seq.EmptyR, _) -> y
581 (_, Seq.EmptyL) -> x
582
583 unionsXml :: Foldable f => f XMLs -> XMLs
584 unionsXml = foldl' unionXml mempty