]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Write/XML.hs
Fix Index.
[doclang.git] / Hdoc / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Hdoc.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
22 import Data.Set (Set)
23 import Data.TreeSeq.Strict (Tree(..), tree0)
24 import Data.Tuple (uncurry)
25 import Prelude (Num(..), undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text.Lazy as TL
30 import qualified Hdoc.TCT.Write.Plain as Plain
31
32 -- import Hdoc.TCT.Debug
33 import Hdoc.TCT.Utils
34 import Hdoc.TCT as TCT hiding (Parser)
35 import Hdoc.XML (XML, XMLs)
36 import qualified Hdoc.XML as XML
37 import Text.Blaze.XML ()
38
39 -- | Main entry point
40 --
41 -- NOTE: 'XmlNode' are still annotated with 'Cell',
42 -- but nothing is done to preserve any ordering amongst them,
43 -- because 'Node's sometimes need to be reordered
44 -- (eg. about/title may have a title from the section before,
45 -- hence outside of about).
46 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
47 writeXML :: Roots -> XMLs
48 writeXML doc =
49 -- (`S.evalState` def) $
50 case Seq.viewl doc of
51 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
52 let (titles, content) = partitionSection sec in
53 case Seq.viewl titles of
54 (unTree -> Cell st _) :< _ ->
55 xmlify def
56 { inh_titles = titles
57 , inh_figure = True
58 } contentWithAbout <>
59 xmlify def foot
60 where
61 contentWithAbout =
62 case Seq.findIndexL isAbout content of
63 Nothing -> Tree (Cell st $ NodeHeader $ HeaderColon "about" "") mempty <| content
64 Just{} -> content
65 isAbout = \case
66 (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True
67 _ -> False
68 _ -> xmlify def doc
69 _ -> xmlify def doc
70
71 -- * Type 'Inh'
72 data Inh
73 = Inh
74 { inh_figure :: Bool
75 , inh_para :: [Inh -> Root -> XML]
76 , inh_titles :: Roots
77 }
78 instance Default Inh where
79 def = Inh
80 { inh_figure = False
81 , inh_para = List.repeat elementPara
82 , inh_titles = mempty
83 }
84
85 -- ** 'inh_para'
86 elementPara :: Inh -> Root -> XML
87 elementPara inh (Tree c ts) = Tree (XML.NodeElem "para" <$ c) $ xmlify inh ts
88
89 elementTitle :: Inh -> Root -> XML
90 elementTitle inh (Tree c ts) = Tree (XML.NodeElem "title" <$ c) $ xmlify inh ts
91
92 elementName :: Inh -> Root -> XML
93 elementName inh (Tree c ts) = Tree (XML.NodeElem "name" <$ c) $ xmlify inh ts
94
95 attributeName :: Inh -> Root -> XML
96 attributeName _inh (Tree c ts) = tree0 (XML.NodeAttr "name" (Plain.writePlain ts) <$ c)
97
98 attributeId :: Inh -> Root -> XML
99 attributeId _inh (Tree c ts) = tree0 (XML.NodeAttr "id" (Plain.writePlain ts) <$ c)
100
101 -- * Class 'Xmlify'
102 class Xmlify a where
103 xmlify :: Inh -> a -> XMLs
104 instance Xmlify Roots where
105 xmlify inh roots =
106 case Seq.viewl roots of
107 EmptyL -> mempty
108 r@(Tree cr@(Cell _sr nr) ts) :< rs ->
109 case nr of
110 ----------------------
111 -- NOTE: HeaderColon becomes parent
112 -- of any continuous following-sibling HeaderBar or HeaderGreat
113 NodeHeader (HeaderColon n _wh)
114 | (span, rest) <- spanlHeaderColon rs
115 , not $ null span ->
116 xmlify inh (Tree cr (ts<>span)) <>
117 xmlify inh rest
118 where
119 spanlHeaderColon :: Roots -> (Roots, Roots)
120 spanlHeaderColon =
121 Seq.spanl $ \case
122 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
123 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
124 _ -> False
125 ----------------------
126 -- NOTE: gather HeaderBrackets
127 NodeHeader HeaderBrackets{}
128 | (span,rest) <- spanlBrackets roots
129 , not (null span) ->
130 (<| xmlify inh rest) $
131 element "references" $
132 span >>= xmlify inh
133 where
134 spanlBrackets :: Roots -> (Roots, Roots)
135 spanlBrackets =
136 Seq.spanl $ \case
137 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
138 _ -> False
139 ----------------------
140 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
141 NodeText x
142 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
143 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
144 ----------------------
145 -- NOTE: detect (some text)[http://some.url] or (some text)[SomeRef]
146 NodePair PairParen
147 | Tree (Cell sb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
148 (<| xmlify inh rs') $
149 case bracket of
150 (toList -> [unTree -> Cell sl (NodeToken (TokenLink lnk))]) ->
151 element "eref" $
152 xmlAttrs [Cell sl ("to",lnk)] <>
153 xmlify inh ts
154 _ ->
155 element "rref" $
156 xmlAttrs [Cell sb ("to",Plain.writePlain bracket)] <>
157 if null ts -- NOTE: preserve empty parens
158 then Seq.singleton $ tree0 (XML.NodeText "" <$ cr)
159 else xmlify inh ts
160 ----------------------
161 -- NOTE: gather HeaderDash
162 _ | (span, rest) <- spanlItems (==HeaderDash) roots
163 , not $ null span ->
164 (<| xmlify inh rest) $
165 element "ul" $
166 span >>= xmlify inh{inh_para=List.repeat elementPara}
167 ----------------------
168 -- NOTE: gather HeaderDot
169 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
170 , not $ null span ->
171 (<| xmlify inh rest) $
172 element "ol" $
173 span >>= xmlify inh{inh_para=List.repeat elementPara}
174 where
175 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
176 spanlItems liHeader =
177 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
178 case nod of
179 NodeHeader (HeaderColon "li" _wh) -> True
180 NodeHeader hdr -> liHeader hdr
181 NodePair (PairElem "li" _as) -> True
182 _ -> False
183 ----------------------
184 NodePara | para:inh_para <- inh_para inh ->
185 para inh r <|
186 xmlify inh{inh_para} rs
187 ----------------------
188 -- NOTE: context-free Root
189 _ ->
190 xmlify inh r `unionXml`
191 xmlify inh rs
192 where
193 element :: XML.Name -> XMLs -> XML
194 element n = Tree (XML.NodeElem n <$ cr)
195 instance Xmlify Root where
196 xmlify inh tn@(Tree (Cell ss@(sn:|ssn) nod) ts) =
197 case nod of
198 ----------------------
199 NodePara ->
200 case inh_para inh of
201 [] -> xmlify inh ts
202 para:_ -> Seq.singleton $ para inh tn
203 ----------------------
204 NodeHeader hdr ->
205 case hdr of
206 --
207 HeaderSection{} ->
208 Seq.singleton $
209 element "section" $ head <> xmlify inh' body
210 where
211 (titles, content) = partitionSection tn
212 (attrs, body) = partitionAttrs content
213 head =
214 case Seq.viewl titles of
215 EmptyL -> mempty
216 title@(unTree -> ct) :< subtitles ->
217 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
218 xmlify inh{inh_para=List.repeat elementTitle} title <>
219 aliases
220 where
221 aliases =
222 subtitles >>= \subtitle@(unTree -> cs) ->
223 return $
224 Tree (cs $> XML.NodeElem "alias") $
225 xmlAttrs (Seq.singleton $ ct $> ("id",getAttrId subtitle)) <>
226 xmlify inh{inh_para=List.repeat elementTitle} subtitle
227 inh' = inh
228 { inh_para = List.repeat elementPara
229 , inh_figure = True
230 }
231 --
232 HeaderColon n _wh ->
233 let (attrs,body) = partitionAttrs ts in
234 case n of
235 -- NOTE: insert titles into <about>
236 "about" ->
237 Seq.singleton $
238 element "about" $
239 xmlify inh' (inh_titles inh) <>
240 xmlAttrs attrs <>
241 xmlify inh'{inh_figure=False} body
242 -- NOTE: handle judgment
243 _ | n`List.elem`elemsJudgment -> -- FIXME: not a special case so far.
244 Seq.singleton $
245 element (XML.localName n) $
246 xmlAttrs attrs <>
247 xmlify inh'' body
248 where
249 inh'' = inh'
250 { inh_para =
251 case n of
252 "grades" -> List.repeat attributeId
253 "judges" -> List.repeat attributeId
254 _ -> List.repeat elementTitle
255 }
256 -- NOTE: in <figure> mode, unreserved elements become <figure>
257 _ | inh_figure inh && n`List.notElem`elems || TL.null n ->
258 Seq.singleton $
259 element "figure" $
260 -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <>
261 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_begin sn}:|ssn) ("type", n)) <>
262 case toList body of
263 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
264 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
265 -- NOTE: reserved elements
266 _ ->
267 Seq.singleton $
268 element (XML.localName n) $
269 xmlAttrs attrs <>
270 xmlify inh' body
271 where
272 inh' = inh
273 { inh_para =
274 case n of
275 "about" -> List.repeat elementTitle
276 "reference" -> elementTitle : List.repeat elementPara
277 "serie" -> List.repeat attributeName
278 "author" -> List.repeat attributeName
279 "editor" -> List.repeat attributeName
280 "org" -> List.repeat attributeName
281 "note" -> List.repeat elementPara
282 _ -> []
283 }
284 --
285 HeaderBar n wh ->
286 if inh_figure inh && n`List.notElem`elems || TL.null n
287 then
288 Seq.singleton $
289 element "artwork" $
290 xmlAttrs (Seq.singleton $ Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
291 xmlify inh{inh_para=[]} ts
292 else
293 xmlify inh $
294 Tree (cell $ NodeHeader $ HeaderColon n wh) ts
295 --
296 HeaderGreat n _wh ->
297 Seq.singleton $
298 let (attrs,body) = partitionAttrs ts in
299 element "quote" $
300 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
301 xmlify inh{inh_para=List.repeat elementPara} body
302 --
303 HeaderEqual n _wh ->
304 Seq.singleton $
305 Tree0 $ cell $ XML.NodeAttr (XML.localName n) $
306 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
307 --
308 HeaderDot n ->
309 Seq.singleton $
310 element "li" $
311 let span_end = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length n)} in
312 xmlAttrs (Seq.singleton $ Cell (sn{span_end}:|ssn) ("name", n)) <>
313 xmlify inh ts
314 --
315 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
316 --
317 HeaderDashDash ->
318 Seq.singleton $ Tree0 $ cell $
319 XML.NodeComment $ Plain.writePlain ts
320 --
321 HeaderBrackets ident ->
322 let (attrs,body) = partitionAttrs ts in
323 Seq.singleton $
324 element "reference" $
325 xmlAttrs (setAttr (Cell (sn{span_end=span_end sn}:|ssn) ("id",ident)) attrs) <>
326 xmlify inh'{inh_para = elementTitle : elementTitle : List.repeat elementPara} body
327 where
328 inh' = inh{inh_figure = False}
329 --
330 HeaderDotSlash _file -> xmlify inh ts
331 ----------------------
332 NodePair pair ->
333 case pair of
334 PairBracket | to <- Plain.writePlain ts
335 , TL.all (\c -> c/='[' && c/=']' && Char.isPrint c && not (Char.isSpace c)) to ->
336 Seq.singleton $
337 element "rref" $
338 xmlAttrs [cell ("to",to)]
339 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
340 PairDash -> Seq.singleton $ element "del" $ xmlify inh ts
341 PairUnderscore -> Seq.singleton $ element "u" $ xmlify inh ts
342 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
343 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
344 PairFrenchquote ->
345 Seq.singleton $
346 element "q" $
347 case ts of
348 (Seq.viewl -> Tree0 (Cell sl (NodeToken (TokenText l))) :< ls) ->
349 case Seq.viewr ls of
350 m :> Tree0 (Cell sr (NodeToken (TokenText r))) ->
351 xmlify inh $
352 Tree0 (Cell sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l))))
353 Seq.<|(m Seq.|>Tree0 (Cell sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r)))))
354 _ ->
355 xmlify inh $
356 Tree0 (Cell sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls
357 (Seq.viewr -> rs :> Tree0 (Cell sr (NodeToken (TokenText r)))) ->
358 xmlify inh $
359 rs Seq.|> Tree0 (Cell sr (NodeToken (TokenText (TL.dropAround Char.isSpace r))))
360 _ -> xmlify inh ts
361 PairHash ->
362 Seq.singleton $
363 element "tag" $
364 xmlify inh{inh_para=[]} ts
365 -- xmlAttrs [cell ("to",Plain.writePlain ts)]
366 PairElem name attrs ->
367 Seq.singleton $
368 element (XML.localName name) $
369 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
370 cell (XML.localName elemAttr_name,elemAttr_value)) <$> attrs) <>
371 xmlify inh ts
372 _ ->
373 Seq.singleton (Tree0 $ Cell (sn{span_end=bn'}:|ssn) $ XML.NodeText open) `unionXml`
374 xmlify inh ts `unionXml`
375 Seq.singleton (Tree0 $ Cell (sn{span_begin=en'}:|ssn) $ XML.NodeText close)
376 where
377 (open, close) = pairBorders pair ts
378 bn' = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length open)}
379 en' = (span_end sn){pos_column=pos_column (span_end sn) - int (TL.length close)}
380 ----------------------
381 NodeText t -> Seq.singleton $ Tree0 $ cell $ XML.NodeText t
382 ----------------------
383 NodeToken tok ->
384 case tok of
385 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XML.NodeText $ TL.singleton c
386 TokenText t -> Seq.singleton $ Tree0 $ cell $ XML.NodeText t
387 TokenTag t -> Seq.singleton $ element "tag" $ Seq.singleton $ Tree0 $ cell $ XML.NodeText t
388 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
389 ----------------------
390 NodeLower n as ->
391 Seq.singleton $
392 element "artwork" $
393 xmlify inh ts
394 where
395 cell :: a -> Cell a
396 cell = Cell ss
397 element :: XML.Name -> XMLs -> XML
398 element n = Tree (cell $ XML.NodeElem n)
399 instance Xmlify (Seq (Cell (XML.Name,TL.Text))) where
400 xmlify _inh = xmlAttrs
401
402 -- * Elements
403
404 -- | Reserved elements' name
405 elems :: Set TL.Text
406 elems =
407 [ "about"
408 , "abstract"
409 , "address"
410 , "alias"
411 , "annotation"
412 , "area"
413 , "artwork"
414 , "aside"
415 , "audio"
416 , "author"
417 , "authors"
418 , "bcp14"
419 , "br"
420 , "break"
421 , "call"
422 , "city"
423 , "code"
424 , "comment"
425 , "comments"
426 , "country"
427 , "date"
428 , "dd"
429 , "default"
430 , "define"
431 , "del"
432 , "div"
433 , "dl"
434 , "document"
435 , "dt"
436 , "editor"
437 , "email"
438 , "embed"
439 , "eref"
440 , "fax"
441 , "feed"
442 , "feedback"
443 , "figure"
444 , "filter"
445 , "format"
446 , "from"
447 , "h"
448 , "hi"
449 , "html5"
450 , "i"
451 , "index"
452 , "iref"
453 , "keyword"
454 , "li"
455 , "link"
456 , "name"
457 , "note"
458 , "ol"
459 , "organization"
460 , "para"
461 , "postamble"
462 , "preamble"
463 , "q"
464 , "reference"
465 , "references"
466 , "region"
467 , "rref"
468 , "sc"
469 , "section"
470 , "serie"
471 , "source"
472 , "span"
473 , "street"
474 , "style"
475 , "sub"
476 , "sup"
477 , "table"
478 , "tag"
479 , "tbody"
480 , "td"
481 , "tel"
482 , "tfoot"
483 , "th"
484 , "thead"
485 , "title"
486 , "toc"
487 , "tof"
488 , "tr"
489 , "tt"
490 , "u"
491 , "ul"
492 , "uri"
493 , "version"
494 , "video"
495 , "workgroup"
496 , "xml"
497 , "zipcode"
498 ]
499
500 elemsJudgment :: Set TL.Text
501 elemsJudgment =
502 [ "choice"
503 , "grade"
504 , "grades"
505 , "judge"
506 , "judges"
507 , "judgment"
508 , "opinion"
509 ]
510
511 -- * Attributes
512
513 -- | Convenient alias, forcing the types
514 xmlAttrs :: Seq (Cell (XML.Name,TL.Text)) -> XMLs
515 xmlAttrs = (Tree0 . (uncurry XML.NodeAttr <$>) <$>)
516
517 -- | Extract section titles
518 partitionSection :: Root -> (Roots, Roots)
519 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
520 case Seq.viewl body of
521 EmptyL -> mempty
522 title@(unTree -> Cell (Span{span_end=et}:|_) NodePara) :< rest ->
523 let (subtitles, content) = spanlSubtitles et rest in
524 (title <| (subtitles >>= subTrees), content)
525 where
526 spanlSubtitles ep ts =
527 case Seq.viewl ts of
528 sub@(unTree -> Cell (Span{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs
529 | lvlSub <= lvlPar
530 , pos_line span_begin - pos_line ep <= 1 ->
531 let (subs, ts') = spanlSubtitles span_end rs in
532 (sub <| subs, ts')
533 _ -> (mempty, ts)
534 _ -> (mempty, body)
535 partitionSection _ = mempty
536
537 -- | Extract attributes
538 partitionAttrs :: Roots -> (Seq (Cell (XML.Name, TL.Text)), Roots)
539 partitionAttrs ts = (attrs,cs)
540 where
541 (as,cs) = (`Seq.partition` ts) $ \case
542 Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n
543 _ -> False
544 attrs = attr <$> as
545 attr = \case
546 Tree (Cell loc (NodeHeader (HeaderEqual n _wh))) a ->
547 Cell loc (XML.localName n, v)
548 where
549 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
550 _ -> undefined
551
552 getAttrId :: Root -> TL.Text
553 getAttrId = Plain.writePlain . Seq.singleton
554
555 setAttr ::
556 Cell (XML.Name, TL.Text) ->
557 Seq (Cell (XML.Name, TL.Text)) ->
558 Seq (Cell (XML.Name, TL.Text))
559 setAttr a@(unCell -> (k, _v)) as =
560 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
561 Just idx -> Seq.update idx a as
562 Nothing -> a <| as
563
564 defaultAttr ::
565 Seq (Cell (XML.Name, TL.Text)) ->
566 Cell (XML.Name, TL.Text) ->
567 Seq (Cell (XML.Name, TL.Text))
568 defaultAttr as a@(unCell -> (k, _v)) =
569 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
570 Just _idx -> as
571 Nothing -> a <| as
572
573 -- * Text
574
575 -- | Unify two 'XMLs', merging border 'XML.NodeText's if any.
576 unionXml :: XMLs -> XMLs -> XMLs
577 unionXml x y =
578 case (Seq.viewr x, Seq.viewl y) of
579 (xs :> x0, y0 :< ys) ->
580 case (x0,y0) of
581 ( Tree0 (Cell ssx@(Span {span_file=fx}:|_sx) (XML.NodeText tx))
582 , Tree0 (Cell ssy@(Span {span_file=fy}:|_sy) (XML.NodeText ty)) ) | fx == fy ->
583 xs `unionXml`
584 Seq.singleton (Tree0 $ (XML.NodeText <$>) $ Cell ssx tx <> Cell ssy ty) `unionXml`
585 ys
586 _ -> x <> y
587 (Seq.EmptyR, _) -> y
588 (_, Seq.EmptyL) -> x
589
590 unionsXml :: Foldable f => f XMLs -> XMLs
591 unionsXml = foldl' unionXml mempty