]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Write/XML.hs
Add PairAt, TokenAt and PlainAt.
[doclang.git] / Hdoc / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Hdoc.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
22 import Data.Set (Set)
23 import Data.TreeSeq.Strict (Tree(..), tree0)
24 import Data.Tuple (uncurry)
25 import Prelude (Num(..), undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text.Lazy as TL
30 import qualified Hdoc.TCT.Write.Plain as Plain
31
32 -- import Hdoc.TCT.Debug
33 import Hdoc.TCT.Utils
34 import Hdoc.TCT as TCT hiding (Parser)
35 import Hdoc.XML (XML, XMLs)
36 import qualified Hdoc.XML as XML
37 import Text.Blaze.XML ()
38
39 -- | Main entry point
40 --
41 -- NOTE: 'XmlNode' are still annotated with 'Cell',
42 -- but nothing is done to preserve any ordering amongst them,
43 -- because 'Node's sometimes need to be reordered
44 -- (eg. about/title may have a title from the section before,
45 -- hence outside of about).
46 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
47 writeXML :: Roots -> XMLs
48 writeXML doc =
49 -- (`S.evalState` def) $
50 case Seq.viewl doc of
51 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
52 let (titles, content) = partitionSection sec in
53 case Seq.viewl titles of
54 (unTree -> Cell st _) :< _ ->
55 xmlify def
56 { inh_titles = titles
57 , inh_figure = True
58 } contentWithAbout <>
59 xmlify def foot
60 where
61 contentWithAbout =
62 case Seq.findIndexL isAbout content of
63 Nothing -> Tree (Cell st $ NodeHeader $ HeaderColon "about" "") mempty <| content
64 Just{} -> content
65 isAbout = \case
66 (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True
67 _ -> False
68 _ -> xmlify def doc
69 _ -> xmlify def doc
70
71 -- * Type 'Inh'
72 data Inh
73 = Inh
74 { inh_figure :: Bool
75 , inh_para :: [Inh -> Root -> XML]
76 , inh_titles :: Roots
77 }
78 instance Default Inh where
79 def = Inh
80 { inh_figure = False
81 , inh_para = List.repeat elementPara
82 , inh_titles = mempty
83 }
84
85 -- ** 'inh_para'
86 elementPara :: Inh -> Root -> XML
87 elementPara inh (Tree c ts) = Tree (XML.NodeElem "para" <$ c) $ xmlify inh ts
88
89 elementTitle :: Inh -> Root -> XML
90 elementTitle inh (Tree c ts) = Tree (XML.NodeElem "title" <$ c) $ xmlify inh ts
91
92 elementName :: Inh -> Root -> XML
93 elementName inh (Tree c ts) = Tree (XML.NodeElem "name" <$ c) $ xmlify inh ts
94
95 attributeName :: Inh -> Root -> XML
96 attributeName _inh (Tree c ts) = tree0 (XML.NodeAttr "name" (Plain.writePlain ts) <$ c)
97
98 attributeId :: Inh -> Root -> XML
99 attributeId _inh (Tree c ts) = tree0 (XML.NodeAttr "id" (Plain.writePlain ts) <$ c)
100
101 -- * Class 'Xmlify'
102 class Xmlify a where
103 xmlify :: Inh -> a -> XMLs
104 instance Xmlify Roots where
105 xmlify inh roots =
106 case Seq.viewl roots of
107 EmptyL -> mempty
108 r@(Tree cr@(Cell _sr nr) ts) :< rs ->
109 case nr of
110 ----------------------
111 -- NOTE: HeaderColon becomes parent
112 -- of any continuous following-sibling HeaderBar or HeaderGreat
113 NodeHeader (HeaderColon n _wh)
114 | (span, rest) <- spanlHeaderColon rs
115 , not $ null span ->
116 xmlify inh (Tree cr (ts<>span)) <>
117 xmlify inh rest
118 where
119 spanlHeaderColon :: Roots -> (Roots, Roots)
120 spanlHeaderColon =
121 Seq.spanl $ \case
122 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
123 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
124 _ -> False
125 ----------------------
126 -- NOTE: gather HeaderBrackets
127 NodeHeader HeaderBrackets{}
128 | (span,rest) <- spanlBrackets roots
129 , not (null span) ->
130 (<| xmlify inh rest) $
131 element "references" $
132 span >>= xmlify inh
133 where
134 spanlBrackets :: Roots -> (Roots, Roots)
135 spanlBrackets =
136 Seq.spanl $ \case
137 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
138 _ -> False
139 ----------------------
140 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
141 NodeText x
142 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
143 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
144 ----------------------
145 -- NOTE: detect (some text)[http://some.url] or (some text)[SomeRef]
146 NodePair PairParen
147 | Tree (Cell sb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
148 (<| xmlify inh rs') $
149 case bracket of
150 (toList -> [unTree -> Cell sl (NodeToken (TokenLink lnk))]) ->
151 element "eref" $
152 xmlAttrs [Cell sl ("to",lnk)] <>
153 xmlify inh ts
154 _ ->
155 element "ref" $
156 xmlAttrs [Cell sb ("to",Plain.writePlain bracket)] <>
157 if null ts -- NOTE: preserve empty parens
158 then Seq.singleton $ tree0 (XML.NodeText "" <$ cr)
159 else xmlify inh ts
160 ----------------------
161 -- NOTE: gather HeaderDash
162 _ | (span, rest) <- spanlItems (==HeaderDash) roots
163 , not $ null span ->
164 (<| xmlify inh rest) $
165 element "ul" $
166 span >>= xmlify inh{inh_para=List.repeat elementPara}
167 ----------------------
168 -- NOTE: gather HeaderDot
169 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
170 , not $ null span ->
171 (<| xmlify inh rest) $
172 element "ol" $
173 span >>= xmlify inh{inh_para=List.repeat elementPara}
174 where
175 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
176 spanlItems liHeader =
177 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
178 case nod of
179 NodeHeader (HeaderColon "li" _wh) -> True
180 NodeHeader hdr -> liHeader hdr
181 NodePair (PairElem "li" _as) -> True
182 _ -> False
183 ----------------------
184 NodePara | para:inh_para <- inh_para inh ->
185 para inh r <|
186 xmlify inh{inh_para} rs
187 ----------------------
188 -- NOTE: context-free Root
189 _ ->
190 xmlify inh r `unionXml`
191 xmlify inh rs
192 where
193 element :: XML.Name -> XMLs -> XML
194 element n = Tree (XML.NodeElem n <$ cr)
195 instance Xmlify Root where
196 xmlify inh tn@(Tree (Cell ss@(sn:|ssn) nod) ts) =
197 case nod of
198 ----------------------
199 NodePara ->
200 case inh_para inh of
201 [] -> xmlify inh ts
202 para:_ -> Seq.singleton $ para inh tn
203 ----------------------
204 NodeHeader hdr ->
205 case hdr of
206 --
207 HeaderSection{} ->
208 Seq.singleton $
209 element "section" $ head <> xmlify inh' body
210 where
211 (titles, content) = partitionSection tn
212 (attrs, body) = partitionAttrs content
213 head =
214 case Seq.viewl titles of
215 EmptyL -> mempty
216 title@(unTree -> ct) :< subtitles ->
217 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
218 xmlify inh{inh_para=List.repeat elementTitle} title <>
219 aliases
220 where
221 aliases =
222 subtitles >>= \subtitle@(unTree -> cs) ->
223 return $
224 Tree (cs $> XML.NodeElem "alias") $
225 xmlAttrs (Seq.singleton $ ct $> ("id",getAttrId subtitle)) <>
226 xmlify inh{inh_para=List.repeat elementTitle} subtitle
227 inh' = inh
228 { inh_para = List.repeat elementPara
229 , inh_figure = True
230 }
231 --
232 HeaderColon n _wh ->
233 let (attrs,body) = partitionAttrs ts in
234 case n of
235 -- NOTE: insert titles into <about>
236 "about" ->
237 Seq.singleton $
238 element "about" $
239 xmlify inh' (inh_titles inh) <>
240 xmlAttrs attrs <>
241 xmlify inh'{inh_figure=False} body
242 -- NOTE: handle judgment
243 _ | n`List.elem`elemsJudgment -> -- FIXME: not a special case so far.
244 Seq.singleton $
245 element (XML.localName n) $
246 xmlAttrs attrs <>
247 xmlify inh'' body
248 where
249 inh'' = inh'
250 { inh_para =
251 case n of
252 "grades" -> List.repeat attributeId
253 "judges" -> List.repeat attributeId
254 _ -> List.repeat elementTitle
255 }
256 -- NOTE: in <figure> mode, unreserved elements become <figure>
257 _ | inh_figure inh && n`List.notElem`elems || TL.null n ->
258 Seq.singleton $
259 element "figure" $
260 -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <>
261 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_begin sn}:|ssn) ("type", n)) <>
262 case toList body of
263 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
264 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
265 -- NOTE: reserved elements
266 _ ->
267 Seq.singleton $
268 element (XML.localName n) $
269 xmlAttrs attrs <>
270 xmlify inh' body
271 where
272 inh' = inh
273 { inh_para =
274 case n of
275 "about" -> List.repeat elementTitle
276 "reference" -> elementTitle : List.repeat elementPara
277 "serie" -> List.repeat attributeName
278 "author" -> List.repeat attributeName
279 "editor" -> List.repeat attributeName
280 "org" -> List.repeat attributeName
281 "note" -> List.repeat elementPara
282 _ -> []
283 }
284 --
285 HeaderBar n wh ->
286 if inh_figure inh && n`List.notElem`elems || TL.null n
287 then
288 Seq.singleton $
289 element "artwork" $
290 xmlAttrs (Seq.singleton $ Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
291 xmlify inh{inh_para=[]} ts
292 else
293 xmlify inh $
294 Tree (cell $ NodeHeader $ HeaderColon n wh) ts
295 --
296 HeaderGreat n _wh ->
297 Seq.singleton $
298 let (attrs,body) = partitionAttrs ts in
299 element "quote" $
300 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
301 xmlify inh{inh_para=List.repeat elementPara} body
302 --
303 HeaderEqual n _wh ->
304 Seq.singleton $
305 Tree0 $ cell $ XML.NodeAttr (XML.localName n) $
306 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
307 --
308 HeaderDot n ->
309 Seq.singleton $
310 element "li" $
311 let span_end = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length n)} in
312 xmlAttrs (Seq.singleton $ Cell (sn{span_end}:|ssn) ("name", n)) <>
313 xmlify inh ts
314 --
315 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
316 --
317 HeaderDashDash ->
318 Seq.singleton $ Tree0 $ cell $
319 XML.NodeComment $ Plain.writePlain ts
320 --
321 HeaderBrackets ident ->
322 let (attrs,body) = partitionAttrs ts in
323 Seq.singleton $
324 element "reference" $
325 xmlAttrs (setAttr (Cell (sn{span_end=span_end sn}:|ssn) ("id",ident)) attrs) <>
326 xmlify inh'{inh_para = elementTitle : elementTitle : List.repeat elementPara} body
327 where
328 inh' = inh{inh_figure = False}
329 --
330 HeaderDotSlash _file -> xmlify inh ts
331 ----------------------
332 NodePair pair ->
333 case pair of
334 PairBracket | to <- Plain.writePlain ts
335 , TL.all (\c -> c/='[' && c/=']' && Char.isPrint c && not (Char.isSpace c)) to ->
336 Seq.singleton $
337 element "ref" $
338 xmlAttrs [cell ("to",to)]
339 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
340 PairDash -> Seq.singleton $ element "del" $ xmlify inh ts
341 PairUnderscore -> Seq.singleton $ element "u" $ xmlify inh ts
342 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
343 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
344 PairFrenchquote ->
345 Seq.singleton $
346 element "q" $
347 case ts of
348 (Seq.viewl -> Tree0 (Cell sl (NodeToken (TokenText l))) :< ls) ->
349 case Seq.viewr ls of
350 m :> Tree0 (Cell sr (NodeToken (TokenText r))) ->
351 xmlify inh $
352 Tree0 (Cell sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l))))
353 Seq.<|(m Seq.|>Tree0 (Cell sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r)))))
354 _ ->
355 xmlify inh $
356 Tree0 (Cell sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls
357 (Seq.viewr -> rs :> Tree0 (Cell sr (NodeToken (TokenText r)))) ->
358 xmlify inh $
359 rs Seq.|> Tree0 (Cell sr (NodeToken (TokenText (TL.dropAround Char.isSpace r))))
360 _ -> xmlify inh ts
361 PairTag isBackref ->
362 Seq.singleton $
363 element (if isBackref then "tag-back" else "tag") $
364 xmlAttrs [cell ("to",Plain.writePlain ts)]
365 -- xmlAttrs [cell ("to",to)]
366 -- xmlify inh{inh_para=[]} ts
367 -- xmlAttrs [cell ("to",Plain.writePlain ts)]
368 PairAt isBackref ->
369 Seq.singleton $
370 element (if isBackref then "at-back" else "at") $
371 xmlAttrs [cell ("to",Plain.writePlain ts)]
372 PairElem name attrs ->
373 Seq.singleton $
374 element (XML.localName name) $
375 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
376 cell (XML.localName elemAttr_name,elemAttr_value)) <$> attrs) <>
377 xmlify inh ts
378 _ ->
379 Seq.singleton (Tree0 $ Cell (sn{span_end=bn'}:|ssn) $ XML.NodeText open) `unionXml`
380 xmlify inh ts `unionXml`
381 Seq.singleton (Tree0 $ Cell (sn{span_begin=en'}:|ssn) $ XML.NodeText close)
382 where
383 (open, close) = pairBorders pair ts
384 bn' = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length open)}
385 en' = (span_end sn){pos_column=pos_column (span_end sn) - int (TL.length close)}
386 ----------------------
387 NodeText t -> Seq.singleton $ Tree0 $ cell $ XML.NodeText t
388 ----------------------
389 NodeToken tok ->
390 case tok of
391 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XML.NodeText $ TL.singleton c
392 TokenText t -> Seq.singleton $ Tree0 $ cell $ XML.NodeText t
393 TokenAt b to -> Seq.singleton $ element (if b then "at-back" else "at") $
394 xmlAttrs [cell ("to",to)] -- Seq.singleton $ Tree0 $ cell $ XML.NodeText to
395 TokenTag b to -> Seq.singleton $ element (if b then "tag-back" else "tag") $
396 xmlAttrs [cell ("to",to)] -- Seq.singleton $ Tree0 $ cell $ XML.NodeText to
397 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
398 ----------------------
399 NodeLower n as ->
400 Seq.singleton $
401 element "artwork" $
402 xmlify inh ts
403 where
404 cell :: a -> Cell a
405 cell = Cell ss
406 element :: XML.Name -> XMLs -> XML
407 element n = Tree (cell $ XML.NodeElem n)
408 instance Xmlify (Seq (Cell (XML.Name,TL.Text))) where
409 xmlify _inh = xmlAttrs
410
411 -- * Elements
412
413 -- | Reserved elements' name
414 elems :: Set TL.Text
415 elems =
416 [ "about"
417 , "abstract"
418 , "address"
419 , "alias"
420 , "annotation"
421 , "area"
422 , "artwork"
423 , "aside"
424 , "at"
425 , "at-back"
426 , "audio"
427 , "author"
428 , "authors"
429 , "bcp14"
430 , "br"
431 , "break"
432 , "call"
433 , "city"
434 , "code"
435 , "comment"
436 , "comments"
437 , "country"
438 , "date"
439 , "dd"
440 , "default"
441 , "define"
442 , "del"
443 , "div"
444 , "dl"
445 , "document"
446 , "dt"
447 , "editor"
448 , "email"
449 , "embed"
450 , "eref"
451 , "fax"
452 , "feed"
453 , "feedback"
454 , "figure"
455 , "filter"
456 , "format"
457 , "from"
458 , "h"
459 , "hi"
460 , "html5"
461 , "i"
462 , "index"
463 , "iref"
464 , "keyword"
465 , "li"
466 , "link"
467 , "name"
468 , "note"
469 , "ol"
470 , "organization"
471 , "para"
472 , "postamble"
473 , "preamble"
474 , "q"
475 , "ref"
476 , "reference"
477 , "references"
478 , "refs"
479 , "region"
480 , "sc"
481 , "section"
482 , "serie"
483 , "source"
484 , "span"
485 , "street"
486 , "style"
487 , "sub"
488 , "sup"
489 , "table"
490 , "tag"
491 , "tag-back"
492 , "tbody"
493 , "td"
494 , "tel"
495 , "tfoot"
496 , "th"
497 , "thead"
498 , "title"
499 , "toc"
500 , "tof"
501 , "tr"
502 , "tt"
503 , "u"
504 , "ul"
505 , "uri"
506 , "version"
507 , "video"
508 , "workgroup"
509 , "xml"
510 , "zipcode"
511 ]
512
513 elemsJudgment :: Set TL.Text
514 elemsJudgment =
515 [ "choice"
516 , "grade"
517 , "grades"
518 , "judge"
519 , "judges"
520 , "judgment"
521 , "opinion"
522 ]
523
524 -- * Attributes
525
526 -- | Convenient alias, forcing the types
527 xmlAttrs :: Seq (Cell (XML.Name,TL.Text)) -> XMLs
528 xmlAttrs = (Tree0 . (uncurry XML.NodeAttr <$>) <$>)
529
530 -- | Extract section titles
531 partitionSection :: Root -> (Roots, Roots)
532 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
533 case Seq.viewl body of
534 EmptyL -> mempty
535 title@(unTree -> Cell (Span{span_end=et}:|_) NodePara) :< rest ->
536 let (subtitles, content) = spanlSubtitles et rest in
537 (title <| (subtitles >>= subTrees), content)
538 where
539 spanlSubtitles ep ts =
540 case Seq.viewl ts of
541 sub@(unTree -> Cell (Span{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs
542 | lvlSub <= lvlPar
543 , pos_line span_begin - pos_line ep <= 1 ->
544 let (subs, ts') = spanlSubtitles span_end rs in
545 (sub <| subs, ts')
546 _ -> (mempty, ts)
547 _ -> (mempty, body)
548 partitionSection _ = mempty
549
550 -- | Extract attributes
551 partitionAttrs :: Roots -> (Seq (Cell (XML.Name, TL.Text)), Roots)
552 partitionAttrs ts = (attrs,cs)
553 where
554 (as,cs) = (`Seq.partition` ts) $ \case
555 Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n
556 _ -> False
557 attrs = attr <$> as
558 attr = \case
559 Tree (Cell loc (NodeHeader (HeaderEqual n _wh))) a ->
560 Cell loc (XML.localName n, v)
561 where
562 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
563 _ -> undefined
564
565 getAttrId :: Root -> TL.Text
566 getAttrId = Plain.writePlain . Seq.singleton
567
568 setAttr ::
569 Cell (XML.Name, TL.Text) ->
570 Seq (Cell (XML.Name, TL.Text)) ->
571 Seq (Cell (XML.Name, TL.Text))
572 setAttr a@(unCell -> (k, _v)) as =
573 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
574 Just idx -> Seq.update idx a as
575 Nothing -> a <| as
576
577 defaultAttr ::
578 Seq (Cell (XML.Name, TL.Text)) ->
579 Cell (XML.Name, TL.Text) ->
580 Seq (Cell (XML.Name, TL.Text))
581 defaultAttr as a@(unCell -> (k, _v)) =
582 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
583 Just _idx -> as
584 Nothing -> a <| as
585
586 -- * Text
587
588 -- | Unify two 'XMLs', merging border 'XML.NodeText's if any.
589 unionXml :: XMLs -> XMLs -> XMLs
590 unionXml x y =
591 case (Seq.viewr x, Seq.viewl y) of
592 (xs :> x0, y0 :< ys) ->
593 case (x0,y0) of
594 ( Tree0 (Cell ssx@(Span {span_file=fx}:|_sx) (XML.NodeText tx))
595 , Tree0 (Cell ssy@(Span {span_file=fy}:|_sy) (XML.NodeText ty)) ) | fx == fy ->
596 xs `unionXml`
597 Seq.singleton (Tree0 $ (XML.NodeText <$>) $ Cell ssx tx <> Cell ssy ty) `unionXml`
598 ys
599 _ -> x <> y
600 (Seq.EmptyR, _) -> y
601 (_, Seq.EmptyL) -> x
602
603 unionsXml :: Foldable f => f XMLs -> XMLs
604 unionsXml = foldl' unionXml mempty