]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Fix XML rendition of PairFrenchquote.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
22 import Data.Set (Set)
23 import Data.TreeSeq.Strict (Tree(..), tree0)
24 import Data.Tuple (uncurry)
25 import Prelude (Num(..), undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text.Lazy as TL
30 import qualified Language.TCT.Write.Plain as Plain
31
32 -- import Language.TCT.Debug
33 import Language.TCT.Utils
34 import Language.TCT hiding (Parser)
35 import Language.XML
36 import Text.Blaze.XML ()
37
38 -- | Main entry point
39 --
40 -- NOTE: 'XmlNode' are still annotated with 'Cell',
41 -- but nothing is done to preserve any ordering amongst them,
42 -- because 'Node's sometimes need to be reordered
43 -- (eg. about/title may have a title from the section before,
44 -- hence outside of about).
45 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
46 writeXML :: Roots -> XMLs
47 writeXML doc =
48 -- (`S.evalState` def) $
49 case Seq.viewl doc of
50 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
51 let (titles, content) = partitionSection sec in
52 case Seq.viewl titles of
53 (unTree -> Cell st _) :< _ ->
54 xmlify def
55 { inh_titles = titles
56 , inh_figure = True
57 } contentWithAbout <>
58 xmlify def foot
59 where
60 contentWithAbout =
61 case Seq.findIndexL isAbout content of
62 Nothing -> Tree (Cell st $ NodeHeader $ HeaderColon "about" "") mempty <| content
63 Just{} -> content
64 isAbout = \case
65 (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True
66 _ -> False
67 _ -> xmlify def doc
68 _ -> xmlify def doc
69
70 partitionSection :: Root -> (Roots, Roots)
71 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
72 case Seq.viewl body of
73 EmptyL -> mempty
74 title@(unTree -> Cell (Span{span_end=et}:|_) NodePara) :< rest ->
75 let (subtitles, content) = spanlSubtitles et rest in
76 (title <| (subtitles >>= subTrees), content)
77 where
78 spanlSubtitles ep ts =
79 case Seq.viewl ts of
80 sub@(unTree -> Cell (Span{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs
81 | lvlSub <= lvlPar
82 , pos_line span_begin - pos_line ep <= 1 ->
83 let (subs, ts') = spanlSubtitles span_end rs in
84 (sub <| subs, ts')
85 _ -> (mempty, ts)
86 _ -> (mempty, body)
87 partitionSection _ = mempty
88
89 -- * Type 'Inh'
90 data Inh
91 = Inh
92 { inh_figure :: Bool
93 , inh_para :: [Inh -> Root -> XML]
94 , inh_titles :: Roots
95 }
96 instance Default Inh where
97 def = Inh
98 { inh_figure = False
99 , inh_para = List.repeat elementPara
100 , inh_titles = mempty
101 }
102
103 -- ** 'inh_para'
104 elementPara :: Inh -> Root -> XML
105 elementPara inh (Tree c ts) = Tree (XmlElem "para" <$ c) $ xmlify inh ts
106
107 elementTitle :: Inh -> Root -> XML
108 elementTitle inh (Tree c ts) = Tree (XmlElem "title" <$ c) $ xmlify inh ts
109
110 elementName :: Inh -> Root -> XML
111 elementName inh (Tree c ts) = Tree (XmlElem "name" <$ c) $ xmlify inh ts
112
113 attributeName :: Inh -> Root -> XML
114 attributeName _inh (Tree c ts) = tree0 (XmlAttr "name" (Plain.writePlain ts) <$ c)
115
116 -- * Class 'Xmlify'
117 class Xmlify a where
118 xmlify :: Inh -> a -> XMLs
119 instance Xmlify Roots where
120 xmlify inh roots =
121 case Seq.viewl roots of
122 EmptyL -> mempty
123 r@(Tree cr@(Cell _sr nr) ts) :< rs ->
124 case nr of
125 ----------------------
126 -- NOTE: HeaderColon becomes parent
127 -- of any continuous following-sibling HeaderBar or HeaderGreat
128 NodeHeader (HeaderColon n _wh)
129 | (span, rest) <- spanlHeaderColon rs
130 , not $ null span ->
131 xmlify inh (Tree cr (ts<>span)) <>
132 xmlify inh rest
133 where
134 spanlHeaderColon :: Roots -> (Roots, Roots)
135 spanlHeaderColon =
136 Seq.spanl $ \case
137 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
138 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
139 _ -> False
140 ----------------------
141 -- NOTE: gather HeaderBrackets
142 NodeHeader HeaderBrackets{}
143 | (span,rest) <- spanlBrackets roots
144 , not (null span) ->
145 (<| xmlify inh rest) $
146 element "references" $
147 span >>= xmlify inh
148 where
149 spanlBrackets :: Roots -> (Roots, Roots)
150 spanlBrackets =
151 Seq.spanl $ \case
152 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
153 _ -> False
154 ----------------------
155 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
156 NodeText x
157 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
158 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
159 ----------------------
160 -- NOTE: detect [some text](http://some.url) or [SomeRef]
161 NodePair PairParen
162 | Tree (Cell sb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
163 (<| xmlify inh rs') $
164 case bracket of
165 (toList -> [unTree -> Cell sl (NodeToken (TokenLink lnk))]) ->
166 element "eref" $
167 xmlAttrs [Cell sl ("to",lnk)] <>
168 xmlify inh ts
169 _ ->
170 element "rref" $
171 xmlAttrs [Cell sb ("to",Plain.writePlain bracket)] <>
172 xmlify inh ts
173 ----------------------
174 -- NOTE: gather HeaderDash
175 _ | (span, rest) <- spanlItems (==HeaderDash) roots
176 , not $ null span ->
177 (<| xmlify inh rest) $
178 element "ul" $
179 span >>= xmlify inh{inh_para=List.repeat elementPara}
180 ----------------------
181 -- NOTE: gather HeaderDot
182 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
183 , not $ null span ->
184 (<| xmlify inh rest) $
185 element "ol" $
186 span >>= xmlify inh{inh_para=List.repeat elementPara}
187 where
188 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
189 spanlItems liHeader =
190 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
191 case nod of
192 NodeHeader (HeaderColon "li" _wh) -> True
193 NodeHeader hdr -> liHeader hdr
194 NodePair (PairElem "li" _as) -> True
195 _ -> False
196 ----------------------
197 NodePara | para:inh_para <- inh_para inh ->
198 para inh r <|
199 -- para (() <$ cr) (xmlify inh ts) <|
200 xmlify inh{inh_para} rs
201 ----------------------
202 -- NOTE: context-free Root
203 _ ->
204 xmlify inh r <>
205 xmlify inh rs
206 where
207 element :: XmlName -> XMLs -> XML
208 element n = Tree (XmlElem n <$ cr)
209 instance Xmlify Root where
210 xmlify inh tn@(Tree (Cell ss@(sn:|ssn) nod) ts) =
211 case nod of
212 ----------------------
213 NodePara ->
214 case inh_para inh of
215 [] -> xmlify inh ts
216 para:_ -> Seq.singleton $ para inh tn -- para (() <$ cn) $ xmlify inh ts
217 ----------------------
218 NodeHeader hdr ->
219 case hdr of
220 --
221 HeaderSection{} ->
222 Seq.singleton $
223 element "section" $ head <> xmlify inh' body
224 where
225 (titles, content) = partitionSection tn
226 (attrs, body) = partitionAttrs content
227 head =
228 case Seq.viewl titles of
229 EmptyL -> mempty
230 title@(unTree -> ct) :< subtitles ->
231 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
232 xmlify inh{inh_para=List.repeat elementTitle} title <>
233 aliases
234 where
235 aliases =
236 subtitles >>= \subtitle@(unTree -> cs) ->
237 return $
238 Tree (cs $> XmlElem "alias") $
239 xmlAttrs [cs $> ("id",getAttrId subtitle)]
240 inh' = inh
241 { inh_para = List.repeat elementPara
242 , inh_figure = True
243 }
244 --
245 HeaderColon n _wh ->
246 let (attrs,body) = partitionAttrs ts in
247 case n of
248 -- NOTE: insert titles into <about>.
249 "about" ->
250 Seq.singleton $
251 element "about" $
252 xmlify inh' (inh_titles inh) <>
253 xmlAttrs attrs <>
254 xmlify inh'{inh_figure=False} body
255 -- NOTE: in <figure> mode, unreserved elements become <figure>
256 _ | inh_figure inh && n`List.notElem`elems || TL.null n ->
257 Seq.singleton $
258 element "figure" $
259 -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <>
260 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_begin sn}:|ssn) ("type", n)) <>
261 case toList body of
262 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
263 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
264 -- NOTE: reserved elements
265 _ ->
266 Seq.singleton $
267 element (xmlLocalName n) $
268 xmlAttrs attrs <>
269 xmlify inh' body
270 where
271 inh' = inh
272 { inh_para =
273 case n of
274 "about" -> List.repeat elementTitle
275 "reference" -> elementTitle : List.repeat elementPara
276 "serie" -> List.repeat attributeName
277 "author" -> List.repeat attributeName
278 "editor" -> List.repeat attributeName
279 "org" -> List.repeat attributeName
280 "note" -> List.repeat elementPara
281 _ -> []
282 }
283 --
284 HeaderBar n wh ->
285 if inh_figure inh && n`List.notElem`elems || TL.null n
286 then
287 Seq.singleton $
288 element "artwork" $
289 xmlAttrs (Seq.singleton $ Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
290 xmlify inh{inh_para=[]} ts
291 else
292 xmlify inh $
293 Tree (cell $ NodeHeader $ HeaderColon n wh) ts
294 --
295 HeaderGreat n _wh ->
296 Seq.singleton $
297 let (attrs,body) = partitionAttrs ts in
298 element "quote" $
299 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
300 xmlify inh{inh_para=List.repeat elementPara} body
301 --
302 HeaderEqual n _wh ->
303 Seq.singleton $
304 Tree0 $ cell $ XmlAttr (xmlLocalName n) $
305 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
306 --
307 HeaderDot n ->
308 Seq.singleton $
309 element "li" $
310 let span_end = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length n)} in
311 xmlAttrs (Seq.singleton $ Cell (sn{span_end}:|ssn) ("name", n)) <>
312 xmlify inh ts
313 --
314 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
315 --
316 HeaderDashDash ->
317 Seq.singleton $ Tree0 $ cell $
318 XmlComment $ Plain.writePlain ts
319 --
320 HeaderBrackets ident ->
321 let (attrs,body) = partitionAttrs ts in
322 Seq.singleton $
323 element "reference" $
324 xmlAttrs (setAttr (Cell (sn{span_end=span_end sn}:|ssn) ("id",ident)) attrs) <>
325 xmlify inh'{inh_para = elementTitle : elementTitle : List.repeat elementPara} body
326 where
327 inh' = inh{inh_figure = False}
328 --
329 HeaderDotSlash _file -> xmlify inh ts
330 ----------------------
331 NodePair pair ->
332 case pair of
333 PairBracket | to <- Plain.writePlain ts
334 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
335 Seq.singleton $
336 element "rref" $
337 xmlAttrs [cell ("to",to)]
338 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
339 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
340 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
341 PairFrenchquote ->
342 Seq.singleton $
343 element "q" $
344 case ts of
345 (Seq.viewl -> Tree0 (Cell sl (NodeToken (TokenText l))) :< ls) ->
346 case Seq.viewr ls of
347 m :> Tree0 (Cell sr (NodeToken (TokenText r))) ->
348 xmlify inh $
349 Tree0 (Cell sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l))))
350 Seq.<|(m Seq.|>Tree0 (Cell sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r)))))
351 _ ->
352 xmlify inh $
353 Tree0 (Cell sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls
354 (Seq.viewr -> rs :> Tree0 (Cell sr (NodeToken (TokenText r)))) ->
355 xmlify inh $
356 rs Seq.|> Tree0 (Cell sr (NodeToken (TokenText (TL.dropAround Char.isSpace r))))
357 _ -> xmlify inh ts
358 PairHash ->
359 Seq.singleton $
360 element "ref" $
361 xmlAttrs [cell ("to",Plain.writePlain ts)]
362 PairElem name attrs ->
363 Seq.singleton $
364 element (xmlLocalName name) $
365 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
366 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
367 xmlify inh ts
368 _ ->
369 Seq.singleton (Tree0 $ Cell (sn{span_end=bn'}:|ssn) $ XmlText open) `unionXml`
370 xmlify inh ts `unionXml`
371 Seq.singleton (Tree0 $ Cell (sn{span_begin=en'}:|ssn) $ XmlText close)
372 where
373 (open, close) = pairBorders pair ts
374 bn' = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length open)}
375 en' = (span_end sn){pos_column=pos_column (span_end sn) - int (TL.length close)}
376 ----------------------
377 NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
378 ----------------------
379 NodeToken tok ->
380 case tok of
381 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c
382 TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
383 TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)]
384 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
385 ----------------------
386 NodeLower n as ->
387 Seq.singleton $
388 element "artwork" $
389 xmlify inh ts
390 where
391 cell :: a -> Cell a
392 cell = Cell ss
393 element :: XmlName -> XMLs -> XML
394 element n = Tree (cell $ XmlElem n)
395 instance Xmlify (Seq (Cell (XmlName,TL.Text))) where
396 xmlify _inh = xmlAttrs
397
398 -- * Elements
399
400 -- | Reserved elements' name
401 elems :: Set TL.Text
402 elems =
403 [ "about"
404 , "abstract"
405 , "address"
406 , "alias"
407 , "annotation"
408 , "area"
409 , "artwork"
410 , "aside"
411 , "audio"
412 , "author"
413 , "authors"
414 , "bcp14"
415 , "br"
416 , "call"
417 , "city"
418 , "code"
419 , "comment"
420 , "comments"
421 , "country"
422 , "date"
423 , "dd"
424 , "define"
425 , "del"
426 , "div"
427 , "dl"
428 , "document"
429 , "dt"
430 , "editor"
431 , "email"
432 , "embed"
433 , "eref"
434 , "fax"
435 , "feed"
436 , "feedback"
437 , "figure"
438 , "filter"
439 , "format"
440 , "from"
441 , "h"
442 , "hi"
443 , "html5"
444 , "i"
445 , "index"
446 , "iref"
447 , "keyword"
448 , "li"
449 , "link"
450 , "name"
451 , "note"
452 , "ol"
453 , "organization"
454 , "para"
455 , "postamble"
456 , "preamble"
457 , "q"
458 , "ref"
459 , "reference"
460 , "references"
461 , "region"
462 , "rref"
463 , "sc"
464 , "section"
465 , "serie"
466 , "source"
467 , "span"
468 , "street"
469 , "style"
470 , "sub"
471 , "sup"
472 , "table"
473 , "tbody"
474 , "td"
475 , "tel"
476 , "tfoot"
477 , "title"
478 , "th"
479 , "thead"
480 , "toc"
481 , "tof"
482 , "tr"
483 , "tt"
484 , "u"
485 , "ul"
486 , "uri"
487 , "version"
488 , "video"
489 , "workgroup"
490 , "xml"
491 , "zipcode"
492 ]
493
494 -- * Attributes
495
496 -- | Convenient alias, forcing the types
497 xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs
498 xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>)
499
500 -- | Extract attributes
501 partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots)
502 partitionAttrs ts = (attrs,cs)
503 where
504 (as,cs) = (`Seq.partition` ts) $ \case
505 Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n
506 _ -> False
507 attrs = attr <$> as
508 attr = \case
509 Tree (Cell ssn (NodeHeader (HeaderEqual n _wh))) a ->
510 Cell ssn (xmlLocalName n, v)
511 where
512 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
513 _ -> undefined
514
515 getAttrId :: Root -> TL.Text
516 getAttrId = Plain.writePlain . Seq.singleton
517
518 setAttr ::
519 Cell (XmlName, TL.Text) ->
520 Seq (Cell (XmlName, TL.Text)) ->
521 Seq (Cell (XmlName, TL.Text))
522 setAttr a@(unCell -> (k, _v)) as =
523 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
524 Just idx -> Seq.update idx a as
525 Nothing -> a <| as
526
527 defaultAttr ::
528 Seq (Cell (XmlName, TL.Text)) ->
529 Cell (XmlName, TL.Text) ->
530 Seq (Cell (XmlName, TL.Text))
531 defaultAttr as a@(unCell -> (k, _v)) =
532 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
533 Just _idx -> as
534 Nothing -> a <| as
535
536 -- * Text
537
538 -- | Unify two 'XMLs', merging border 'XmlText's if any.
539 unionXml :: XMLs -> XMLs -> XMLs
540 unionXml x y =
541 case (Seq.viewr x, Seq.viewl y) of
542 (xs :> x0, y0 :< ys) ->
543 case (x0,y0) of
544 ( Tree0 (Cell ssx@(Span {span_file=fx}:|_sx) (XmlText tx))
545 , Tree0 (Cell ssy@(Span {span_file=fy}:|_sy) (XmlText ty)) ) | fx == fy ->
546 xs `unionXml`
547 Seq.singleton (Tree0 $ (XmlText <$>) $ Cell ssx tx <> Cell ssy ty) `unionXml`
548 ys
549 _ -> x <> y
550 (Seq.EmptyR, _) -> y
551 (_, Seq.EmptyL) -> x
552
553 unionsXml :: Foldable f => f XMLs -> XMLs
554 unionsXml = foldl' unionXml mempty