]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Write/XML.hs
Use RWS instead of State.
[doclang.git] / Hdoc / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Hdoc.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
22 import Data.Set (Set)
23 import Data.TreeSeq.Strict (Tree(..), tree0)
24 import Data.Tuple (uncurry)
25 import Prelude (Num(..), undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text.Lazy as TL
30 import qualified Hdoc.TCT.Write.Plain as Plain
31
32 -- import Hdoc.TCT.Debug
33 import Hdoc.TCT.Utils
34 import Hdoc.TCT as TCT hiding (Parser)
35 import Hdoc.XML (XML, XMLs)
36 import qualified Hdoc.XML as XML
37 import Text.Blaze.XML ()
38
39 -- | Main entry point
40 --
41 -- NOTE: 'XmlNode' are still annotated with 'Cell',
42 -- but nothing is done to preserve any ordering amongst them,
43 -- because 'Node's sometimes need to be reordered
44 -- (eg. about/title may have a title from the section before,
45 -- hence outside of about).
46 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
47 writeXML :: Roots -> XMLs
48 writeXML doc =
49 -- (`S.evalState` def) $
50 case Seq.viewl doc of
51 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
52 let (titles, content) = partitionSection sec in
53 case Seq.viewl titles of
54 (unTree -> Cell st _) :< _ ->
55 xmlify def
56 { inh_titles = titles
57 , inh_figure = True
58 } contentWithAbout <>
59 xmlify def foot
60 where
61 contentWithAbout =
62 case Seq.findIndexL isAbout content of
63 Nothing -> Tree (Cell st $ NodeHeader $ HeaderColon "about" "") mempty <| content
64 Just{} -> content
65 isAbout = \case
66 (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True
67 _ -> False
68 _ -> xmlify def doc
69 _ -> xmlify def doc
70
71 -- * Type 'Inh'
72 data Inh
73 = Inh
74 { inh_figure :: Bool
75 , inh_para :: [Inh -> Root -> XML]
76 , inh_titles :: Roots
77 }
78 instance Default Inh where
79 def = Inh
80 { inh_figure = False
81 , inh_para = List.repeat elementPara
82 , inh_titles = mempty
83 }
84
85 -- ** 'inh_para'
86 elementPara :: Inh -> Root -> XML
87 elementPara inh (Tree c ts) = Tree (XML.NodeElem "para" <$ c) $ xmlify inh ts
88
89 elementTitle :: Inh -> Root -> XML
90 elementTitle inh (Tree c ts) = Tree (XML.NodeElem "title" <$ c) $ xmlify inh ts
91
92 elementName :: Inh -> Root -> XML
93 elementName inh (Tree c ts) = Tree (XML.NodeElem "name" <$ c) $ xmlify inh ts
94
95 attributeName :: Inh -> Root -> XML
96 attributeName _inh (Tree c ts) = tree0 (XML.NodeAttr "name" (Plain.writePlain ts) <$ c)
97
98 attributeId :: Inh -> Root -> XML
99 attributeId _inh (Tree c ts) = tree0 (XML.NodeAttr "id" (Plain.writePlain ts) <$ c)
100
101 -- * Class 'Xmlify'
102 class Xmlify a where
103 xmlify :: Inh -> a -> XMLs
104 instance Xmlify Roots where
105 xmlify inh roots =
106 case Seq.viewl roots of
107 EmptyL -> mempty
108 r@(Tree cr@(Cell _sr nr) ts) :< rs ->
109 case nr of
110 ----------------------
111 -- NOTE: HeaderColon becomes parent
112 -- of any continuous following-sibling HeaderBar or HeaderGreat
113 NodeHeader (HeaderColon n _wh)
114 | (span, rest) <- spanlHeaderColon rs
115 , not $ null span ->
116 xmlify inh (Tree cr (ts<>span)) <>
117 xmlify inh rest
118 where
119 spanlHeaderColon :: Roots -> (Roots, Roots)
120 spanlHeaderColon =
121 Seq.spanl $ \case
122 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
123 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
124 _ -> False
125 ----------------------
126 -- NOTE: gather HeaderBrackets
127 NodeHeader HeaderBrackets{}
128 | (span,rest) <- spanlBrackets roots
129 , not (null span) ->
130 (<| xmlify inh rest) $
131 element "references" $
132 span >>= xmlify inh
133 where
134 spanlBrackets :: Roots -> (Roots, Roots)
135 spanlBrackets =
136 Seq.spanl $ \case
137 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
138 _ -> False
139 ----------------------
140 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
141 NodeText x
142 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
143 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
144 ----------------------
145 -- NOTE: detect (some text)[http://some.url] or (some text)[SomeRef]
146 NodePair PairParen
147 | Tree (Cell sb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
148 (<| xmlify inh rs') $
149 case bracket of
150 (toList -> [unTree -> Cell sl (NodeToken (TokenLink lnk))]) ->
151 element "eref" $
152 xmlAttrs [Cell sl ("to",lnk)] <>
153 xmlify inh ts
154 _ ->
155 element "rref" $
156 xmlAttrs [Cell sb ("to",Plain.writePlain bracket)] <>
157 if null ts -- NOTE: preserve empty parens
158 then Seq.singleton $ tree0 (XML.NodeText "" <$ cr)
159 else xmlify inh ts
160 ----------------------
161 -- NOTE: gather HeaderDash
162 _ | (span, rest) <- spanlItems (==HeaderDash) roots
163 , not $ null span ->
164 (<| xmlify inh rest) $
165 element "ul" $
166 span >>= xmlify inh{inh_para=List.repeat elementPara}
167 ----------------------
168 -- NOTE: gather HeaderDot
169 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
170 , not $ null span ->
171 (<| xmlify inh rest) $
172 element "ol" $
173 span >>= xmlify inh{inh_para=List.repeat elementPara}
174 where
175 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
176 spanlItems liHeader =
177 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
178 case nod of
179 NodeHeader (HeaderColon "li" _wh) -> True
180 NodeHeader hdr -> liHeader hdr
181 NodePair (PairElem "li" _as) -> True
182 _ -> False
183 ----------------------
184 NodePara | para:inh_para <- inh_para inh ->
185 para inh r <|
186 xmlify inh{inh_para} rs
187 ----------------------
188 -- NOTE: context-free Root
189 _ ->
190 xmlify inh r `unionXml`
191 xmlify inh rs
192 where
193 element :: XML.Name -> XMLs -> XML
194 element n = Tree (XML.NodeElem n <$ cr)
195 instance Xmlify Root where
196 xmlify inh tn@(Tree (Cell ss@(sn:|ssn) nod) ts) =
197 case nod of
198 ----------------------
199 NodePara ->
200 case inh_para inh of
201 [] -> xmlify inh ts
202 para:_ -> Seq.singleton $ para inh tn
203 ----------------------
204 NodeHeader hdr ->
205 case hdr of
206 --
207 HeaderSection{} ->
208 Seq.singleton $
209 element "section" $ head <> xmlify inh' body
210 where
211 (titles, content) = partitionSection tn
212 (attrs, body) = partitionAttrs content
213 head =
214 case Seq.viewl titles of
215 EmptyL -> mempty
216 title@(unTree -> ct) :< subtitles ->
217 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
218 xmlify inh{inh_para=List.repeat elementTitle} title <>
219 aliases
220 where
221 aliases =
222 subtitles >>= \subtitle@(unTree -> cs) ->
223 return $
224 Tree (cs $> XML.NodeElem "alias") $
225 xmlify inh{inh_para=List.repeat elementTitle} subtitle
226 inh' = inh
227 { inh_para = List.repeat elementPara
228 , inh_figure = True
229 }
230 --
231 HeaderColon n _wh ->
232 let (attrs,body) = partitionAttrs ts in
233 case n of
234 -- NOTE: insert titles into <about>
235 "about" ->
236 Seq.singleton $
237 element "about" $
238 xmlify inh' (inh_titles inh) <>
239 xmlAttrs attrs <>
240 xmlify inh'{inh_figure=False} body
241 -- NOTE: handle judgment
242 _ | n`List.elem`elemsJudgment -> -- FIXME: not a special case so far.
243 Seq.singleton $
244 element (XML.localName n) $
245 xmlAttrs attrs <>
246 xmlify inh'' body
247 where
248 inh'' = inh'
249 { inh_para =
250 case n of
251 "grades" -> List.repeat attributeId
252 "judges" -> List.repeat attributeId
253 _ -> List.repeat elementTitle
254 }
255 -- NOTE: in <figure> mode, unreserved elements become <figure>
256 _ | inh_figure inh && n`List.notElem`elems || TL.null n ->
257 Seq.singleton $
258 element "figure" $
259 -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <>
260 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_begin sn}:|ssn) ("type", n)) <>
261 case toList body of
262 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
263 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
264 -- NOTE: reserved elements
265 _ ->
266 Seq.singleton $
267 element (XML.localName n) $
268 xmlAttrs attrs <>
269 xmlify inh' body
270 where
271 inh' = inh
272 { inh_para =
273 case n of
274 "about" -> List.repeat elementTitle
275 "reference" -> elementTitle : List.repeat elementPara
276 "serie" -> List.repeat attributeName
277 "author" -> List.repeat attributeName
278 "editor" -> List.repeat attributeName
279 "org" -> List.repeat attributeName
280 "note" -> List.repeat elementPara
281 _ -> []
282 }
283 --
284 HeaderBar n wh ->
285 if inh_figure inh && n`List.notElem`elems || TL.null n
286 then
287 Seq.singleton $
288 element "artwork" $
289 xmlAttrs (Seq.singleton $ Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
290 xmlify inh{inh_para=[]} ts
291 else
292 xmlify inh $
293 Tree (cell $ NodeHeader $ HeaderColon n wh) ts
294 --
295 HeaderGreat n _wh ->
296 Seq.singleton $
297 let (attrs,body) = partitionAttrs ts in
298 element "quote" $
299 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
300 xmlify inh{inh_para=List.repeat elementPara} body
301 --
302 HeaderEqual n _wh ->
303 Seq.singleton $
304 Tree0 $ cell $ XML.NodeAttr (XML.localName n) $
305 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
306 --
307 HeaderDot n ->
308 Seq.singleton $
309 element "li" $
310 let span_end = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length n)} in
311 xmlAttrs (Seq.singleton $ Cell (sn{span_end}:|ssn) ("name", n)) <>
312 xmlify inh ts
313 --
314 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
315 --
316 HeaderDashDash ->
317 Seq.singleton $ Tree0 $ cell $
318 XML.NodeComment $ Plain.writePlain ts
319 --
320 HeaderBrackets ident ->
321 let (attrs,body) = partitionAttrs ts in
322 Seq.singleton $
323 element "reference" $
324 xmlAttrs (setAttr (Cell (sn{span_end=span_end sn}:|ssn) ("id",ident)) attrs) <>
325 xmlify inh'{inh_para = elementTitle : elementTitle : List.repeat elementPara} body
326 where
327 inh' = inh{inh_figure = False}
328 --
329 HeaderDotSlash _file -> xmlify inh ts
330 ----------------------
331 NodePair pair ->
332 case pair of
333 PairBracket | to <- Plain.writePlain ts
334 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
335 Seq.singleton $
336 element "rref" $
337 xmlAttrs [cell ("to",to)]
338 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
339 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
340 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
341 PairFrenchquote ->
342 Seq.singleton $
343 element "q" $
344 case ts of
345 (Seq.viewl -> Tree0 (Cell sl (NodeToken (TokenText l))) :< ls) ->
346 case Seq.viewr ls of
347 m :> Tree0 (Cell sr (NodeToken (TokenText r))) ->
348 xmlify inh $
349 Tree0 (Cell sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l))))
350 Seq.<|(m Seq.|>Tree0 (Cell sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r)))))
351 _ ->
352 xmlify inh $
353 Tree0 (Cell sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls
354 (Seq.viewr -> rs :> Tree0 (Cell sr (NodeToken (TokenText r)))) ->
355 xmlify inh $
356 rs Seq.|> Tree0 (Cell sr (NodeToken (TokenText (TL.dropAround Char.isSpace r))))
357 _ -> xmlify inh ts
358 PairHash ->
359 Seq.singleton $
360 element "tag" $
361 xmlify inh{inh_para=[]} ts
362 -- xmlAttrs [cell ("to",Plain.writePlain ts)]
363 PairElem name attrs ->
364 Seq.singleton $
365 element (XML.localName name) $
366 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
367 cell (XML.localName elemAttr_name,elemAttr_value)) <$> attrs) <>
368 xmlify inh ts
369 _ ->
370 Seq.singleton (Tree0 $ Cell (sn{span_end=bn'}:|ssn) $ XML.NodeText open) `unionXml`
371 xmlify inh ts `unionXml`
372 Seq.singleton (Tree0 $ Cell (sn{span_begin=en'}:|ssn) $ XML.NodeText close)
373 where
374 (open, close) = pairBorders pair ts
375 bn' = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length open)}
376 en' = (span_end sn){pos_column=pos_column (span_end sn) - int (TL.length close)}
377 ----------------------
378 NodeText t -> Seq.singleton $ Tree0 $ cell $ XML.NodeText t
379 ----------------------
380 NodeToken tok ->
381 case tok of
382 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XML.NodeText $ TL.singleton c
383 TokenText t -> Seq.singleton $ Tree0 $ cell $ XML.NodeText t
384 TokenTag t -> Seq.singleton $ element "tag" $ Seq.singleton $ Tree0 $ cell $ XML.NodeText t
385 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
386 ----------------------
387 NodeLower n as ->
388 Seq.singleton $
389 element "artwork" $
390 xmlify inh ts
391 where
392 cell :: a -> Cell a
393 cell = Cell ss
394 element :: XML.Name -> XMLs -> XML
395 element n = Tree (cell $ XML.NodeElem n)
396 instance Xmlify (Seq (Cell (XML.Name,TL.Text))) where
397 xmlify _inh = xmlAttrs
398
399 -- * Elements
400
401 -- | Reserved elements' name
402 elems :: Set TL.Text
403 elems =
404 [ "about"
405 , "abstract"
406 , "address"
407 , "alias"
408 , "annotation"
409 , "area"
410 , "artwork"
411 , "aside"
412 , "audio"
413 , "author"
414 , "authors"
415 , "bcp14"
416 , "br"
417 , "break"
418 , "call"
419 , "city"
420 , "code"
421 , "comment"
422 , "comments"
423 , "country"
424 , "date"
425 , "dd"
426 , "default"
427 , "define"
428 , "del"
429 , "div"
430 , "dl"
431 , "document"
432 , "dt"
433 , "editor"
434 , "email"
435 , "embed"
436 , "eref"
437 , "fax"
438 , "feed"
439 , "feedback"
440 , "figure"
441 , "filter"
442 , "format"
443 , "from"
444 , "h"
445 , "hi"
446 , "html5"
447 , "i"
448 , "index"
449 , "iref"
450 , "keyword"
451 , "li"
452 , "link"
453 , "name"
454 , "note"
455 , "ol"
456 , "organization"
457 , "para"
458 , "postamble"
459 , "preamble"
460 , "q"
461 , "reference"
462 , "references"
463 , "region"
464 , "rref"
465 , "sc"
466 , "section"
467 , "serie"
468 , "source"
469 , "span"
470 , "street"
471 , "style"
472 , "sub"
473 , "sup"
474 , "table"
475 , "tag"
476 , "tbody"
477 , "td"
478 , "tel"
479 , "tfoot"
480 , "th"
481 , "thead"
482 , "title"
483 , "toc"
484 , "tof"
485 , "tr"
486 , "tt"
487 , "u"
488 , "ul"
489 , "uri"
490 , "version"
491 , "video"
492 , "workgroup"
493 , "xml"
494 , "zipcode"
495 ]
496
497 elemsJudgment :: Set TL.Text
498 elemsJudgment =
499 [ "choice"
500 , "grade"
501 , "grades"
502 , "judge"
503 , "judges"
504 , "judgment"
505 , "opinion"
506 ]
507
508 -- * Attributes
509
510 -- | Convenient alias, forcing the types
511 xmlAttrs :: Seq (Cell (XML.Name,TL.Text)) -> XMLs
512 xmlAttrs = (Tree0 . (uncurry XML.NodeAttr <$>) <$>)
513
514 -- | Extract section titles
515 partitionSection :: Root -> (Roots, Roots)
516 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
517 case Seq.viewl body of
518 EmptyL -> mempty
519 title@(unTree -> Cell (Span{span_end=et}:|_) NodePara) :< rest ->
520 let (subtitles, content) = spanlSubtitles et rest in
521 (title <| (subtitles >>= subTrees), content)
522 where
523 spanlSubtitles ep ts =
524 case Seq.viewl ts of
525 sub@(unTree -> Cell (Span{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs
526 | lvlSub <= lvlPar
527 , pos_line span_begin - pos_line ep <= 1 ->
528 let (subs, ts') = spanlSubtitles span_end rs in
529 (sub <| subs, ts')
530 _ -> (mempty, ts)
531 _ -> (mempty, body)
532 partitionSection _ = mempty
533
534 -- | Extract attributes
535 partitionAttrs :: Roots -> (Seq (Cell (XML.Name, TL.Text)), Roots)
536 partitionAttrs ts = (attrs,cs)
537 where
538 (as,cs) = (`Seq.partition` ts) $ \case
539 Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n
540 _ -> False
541 attrs = attr <$> as
542 attr = \case
543 Tree (Cell loc (NodeHeader (HeaderEqual n _wh))) a ->
544 Cell loc (XML.localName n, v)
545 where
546 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
547 _ -> undefined
548
549 getAttrId :: Root -> TL.Text
550 getAttrId = Plain.writePlain . Seq.singleton
551
552 setAttr ::
553 Cell (XML.Name, TL.Text) ->
554 Seq (Cell (XML.Name, TL.Text)) ->
555 Seq (Cell (XML.Name, TL.Text))
556 setAttr a@(unCell -> (k, _v)) as =
557 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
558 Just idx -> Seq.update idx a as
559 Nothing -> a <| as
560
561 defaultAttr ::
562 Seq (Cell (XML.Name, TL.Text)) ->
563 Cell (XML.Name, TL.Text) ->
564 Seq (Cell (XML.Name, TL.Text))
565 defaultAttr as a@(unCell -> (k, _v)) =
566 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
567 Just _idx -> as
568 Nothing -> a <| as
569
570 -- * Text
571
572 -- | Unify two 'XMLs', merging border 'XML.NodeText's if any.
573 unionXml :: XMLs -> XMLs -> XMLs
574 unionXml x y =
575 case (Seq.viewr x, Seq.viewl y) of
576 (xs :> x0, y0 :< ys) ->
577 case (x0,y0) of
578 ( Tree0 (Cell ssx@(Span {span_file=fx}:|_sx) (XML.NodeText tx))
579 , Tree0 (Cell ssy@(Span {span_file=fy}:|_sy) (XML.NodeText ty)) ) | fx == fy ->
580 xs `unionXml`
581 Seq.singleton (Tree0 $ (XML.NodeText <$>) $ Cell ssx tx <> Cell ssy ty) `unionXml`
582 ys
583 _ -> x <> y
584 (Seq.EmptyR, _) -> y
585 (_, Seq.EmptyL) -> x
586
587 unionsXml :: Foldable f => f XMLs -> XMLs
588 unionsXml = foldl' unionXml mempty