]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Fix XML merging.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
22 import Data.Set (Set)
23 import Data.TreeSeq.Strict (Tree(..), tree0)
24 import Data.Tuple (uncurry)
25 import Prelude (Num(..), undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text.Lazy as TL
30 import qualified Language.TCT.Write.Plain as Plain
31
32 -- import Language.TCT.Debug
33 import Language.TCT.Utils
34 import Language.TCT hiding (Parser)
35 import Language.XML
36 import Text.Blaze.XML ()
37
38 -- | Main entry point
39 --
40 -- NOTE: 'XmlNode' are still annotated with 'Cell',
41 -- but nothing is done to preserve any ordering amongst them,
42 -- because 'Node's sometimes need to be reordered
43 -- (eg. about/title may have a title from the section before,
44 -- hence outside of about).
45 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
46 writeXML :: Roots -> XMLs
47 writeXML doc =
48 -- (`S.evalState` def) $
49 case Seq.viewl doc of
50 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
51 let (titles, content) = partitionSection sec in
52 case Seq.viewl titles of
53 (unTree -> Cell st _) :< _ ->
54 xmlify def
55 { inh_titles = titles
56 , inh_figure = True
57 } contentWithAbout <>
58 xmlify def foot
59 where
60 contentWithAbout =
61 case Seq.findIndexL isAbout content of
62 Nothing -> Tree (Cell st $ NodeHeader $ HeaderColon "about" "") mempty <| content
63 Just{} -> content
64 isAbout = \case
65 (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True
66 _ -> False
67 _ -> xmlify def doc
68 _ -> xmlify def doc
69
70 partitionSection :: Root -> (Roots, Roots)
71 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
72 case Seq.viewl body of
73 EmptyL -> mempty
74 title@(unTree -> Cell (Span{span_end=et}:|_) NodePara) :< rest ->
75 let (subtitles, content) = spanlSubtitles et rest in
76 (title <| (subtitles >>= subTrees), content)
77 where
78 spanlSubtitles ep ts =
79 case Seq.viewl ts of
80 sub@(unTree -> Cell (Span{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs
81 | lvlSub <= lvlPar
82 , pos_line span_begin - pos_line ep <= 1 ->
83 let (subs, ts') = spanlSubtitles span_end rs in
84 (sub <| subs, ts')
85 _ -> (mempty, ts)
86 _ -> (mempty, body)
87 partitionSection _ = mempty
88
89 -- * Type 'Inh'
90 data Inh
91 = Inh
92 { inh_figure :: Bool
93 , inh_para :: [Inh -> Root -> XML]
94 , inh_titles :: Roots
95 }
96 instance Default Inh where
97 def = Inh
98 { inh_figure = False
99 , inh_para = List.repeat elementPara
100 , inh_titles = mempty
101 }
102
103 -- ** 'inh_para'
104 elementPara :: Inh -> Root -> XML
105 elementPara inh (Tree c ts) = Tree (XmlElem "para" <$ c) $ xmlify inh ts
106
107 elementTitle :: Inh -> Root -> XML
108 elementTitle inh (Tree c ts) = Tree (XmlElem "title" <$ c) $ xmlify inh ts
109
110 elementName :: Inh -> Root -> XML
111 elementName inh (Tree c ts) = Tree (XmlElem "name" <$ c) $ xmlify inh ts
112
113 attributeName :: Inh -> Root -> XML
114 attributeName _inh (Tree c ts) = tree0 (XmlAttr "name" (Plain.writePlain ts) <$ c)
115
116 -- * Class 'Xmlify'
117 class Xmlify a where
118 xmlify :: Inh -> a -> XMLs
119 instance Xmlify Roots where
120 xmlify inh roots =
121 case Seq.viewl roots of
122 EmptyL -> mempty
123 r@(Tree cr@(Cell _sr nr) ts) :< rs ->
124 case nr of
125 ----------------------
126 -- NOTE: HeaderColon becomes parent
127 -- of any continuous following-sibling HeaderBar or HeaderGreat
128 NodeHeader (HeaderColon n _wh)
129 | (span, rest) <- spanlHeaderColon rs
130 , not $ null span ->
131 xmlify inh (Tree cr (ts<>span)) <>
132 xmlify inh rest
133 where
134 spanlHeaderColon :: Roots -> (Roots, Roots)
135 spanlHeaderColon =
136 Seq.spanl $ \case
137 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
138 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
139 _ -> False
140 ----------------------
141 -- NOTE: gather HeaderBrackets
142 NodeHeader HeaderBrackets{}
143 | (span,rest) <- spanlBrackets roots
144 , not (null span) ->
145 (<| xmlify inh rest) $
146 element "references" $
147 span >>= xmlify inh
148 where
149 spanlBrackets :: Roots -> (Roots, Roots)
150 spanlBrackets =
151 Seq.spanl $ \case
152 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
153 _ -> False
154 ----------------------
155 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
156 NodeText x
157 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
158 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
159 ----------------------
160 -- NOTE: detect [some text](http://some.url) or [SomeRef]
161 NodePair PairParen
162 | Tree (Cell sb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
163 (<| xmlify inh rs') $
164 case bracket of
165 (toList -> [unTree -> Cell sl (NodeToken (TokenLink lnk))]) ->
166 element "eref" $
167 xmlAttrs [Cell sl ("to",lnk)] <>
168 xmlify inh ts
169 _ ->
170 element "rref" $
171 xmlAttrs [Cell sb ("to",Plain.writePlain bracket)] <>
172 xmlify inh ts
173 ----------------------
174 -- NOTE: gather HeaderDash
175 _ | (span, rest) <- spanlItems (==HeaderDash) roots
176 , not $ null span ->
177 (<| xmlify inh rest) $
178 element "ul" $
179 span >>= xmlify inh{inh_para=List.repeat elementPara}
180 ----------------------
181 -- NOTE: gather HeaderDot
182 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
183 , not $ null span ->
184 (<| xmlify inh rest) $
185 element "ol" $
186 span >>= xmlify inh{inh_para=List.repeat elementPara}
187 where
188 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
189 spanlItems liHeader =
190 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
191 case nod of
192 NodeHeader (HeaderColon "li" _wh) -> True
193 NodeHeader hdr -> liHeader hdr
194 NodePair (PairElem "li" _as) -> True
195 _ -> False
196 ----------------------
197 NodePara | para:inh_para <- inh_para inh ->
198 para inh r <|
199 xmlify inh{inh_para} rs
200 ----------------------
201 -- NOTE: context-free Root
202 _ ->
203 xmlify inh r `unionXml`
204 xmlify inh rs
205 where
206 element :: XmlName -> XMLs -> XML
207 element n = Tree (XmlElem n <$ cr)
208 instance Xmlify Root where
209 xmlify inh tn@(Tree (Cell ss@(sn:|ssn) nod) ts) =
210 case nod of
211 ----------------------
212 NodePara ->
213 case inh_para inh of
214 [] -> xmlify inh ts
215 para:_ -> Seq.singleton $ para inh tn
216 ----------------------
217 NodeHeader hdr ->
218 case hdr of
219 --
220 HeaderSection{} ->
221 Seq.singleton $
222 element "section" $ head <> xmlify inh' body
223 where
224 (titles, content) = partitionSection tn
225 (attrs, body) = partitionAttrs content
226 head =
227 case Seq.viewl titles of
228 EmptyL -> mempty
229 title@(unTree -> ct) :< subtitles ->
230 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
231 xmlify inh{inh_para=List.repeat elementTitle} title <>
232 aliases
233 where
234 aliases =
235 subtitles >>= \subtitle@(unTree -> cs) ->
236 return $
237 Tree (cs $> XmlElem "alias") $
238 xmlAttrs [cs $> ("id",getAttrId subtitle)]
239 inh' = inh
240 { inh_para = List.repeat elementPara
241 , inh_figure = True
242 }
243 --
244 HeaderColon n _wh ->
245 let (attrs,body) = partitionAttrs ts in
246 case n of
247 -- NOTE: insert titles into <about>.
248 "about" ->
249 Seq.singleton $
250 element "about" $
251 xmlify inh' (inh_titles inh) <>
252 xmlAttrs attrs <>
253 xmlify inh'{inh_figure=False} body
254 -- NOTE: in <figure> mode, unreserved elements become <figure>
255 _ | inh_figure inh && n`List.notElem`elems || TL.null n ->
256 Seq.singleton $
257 element "figure" $
258 -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <>
259 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_begin sn}:|ssn) ("type", n)) <>
260 case toList body of
261 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
262 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
263 -- NOTE: reserved elements
264 _ ->
265 Seq.singleton $
266 element (xmlLocalName n) $
267 xmlAttrs attrs <>
268 xmlify inh' body
269 where
270 inh' = inh
271 { inh_para =
272 case n of
273 "about" -> List.repeat elementTitle
274 "reference" -> elementTitle : List.repeat elementPara
275 "serie" -> List.repeat attributeName
276 "author" -> List.repeat attributeName
277 "editor" -> List.repeat attributeName
278 "org" -> List.repeat attributeName
279 "note" -> List.repeat elementPara
280 _ -> []
281 }
282 --
283 HeaderBar n wh ->
284 if inh_figure inh && n`List.notElem`elems || TL.null n
285 then
286 Seq.singleton $
287 element "artwork" $
288 xmlAttrs (Seq.singleton $ Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
289 xmlify inh{inh_para=[]} ts
290 else
291 xmlify inh $
292 Tree (cell $ NodeHeader $ HeaderColon n wh) ts
293 --
294 HeaderGreat n _wh ->
295 Seq.singleton $
296 let (attrs,body) = partitionAttrs ts in
297 element "quote" $
298 xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <>
299 xmlify inh{inh_para=List.repeat elementPara} body
300 --
301 HeaderEqual n _wh ->
302 Seq.singleton $
303 Tree0 $ cell $ XmlAttr (xmlLocalName n) $
304 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
305 --
306 HeaderDot n ->
307 Seq.singleton $
308 element "li" $
309 let span_end = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length n)} in
310 xmlAttrs (Seq.singleton $ Cell (sn{span_end}:|ssn) ("name", n)) <>
311 xmlify inh ts
312 --
313 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
314 --
315 HeaderDashDash ->
316 Seq.singleton $ Tree0 $ cell $
317 XmlComment $ Plain.writePlain ts
318 --
319 HeaderBrackets ident ->
320 let (attrs,body) = partitionAttrs ts in
321 Seq.singleton $
322 element "reference" $
323 xmlAttrs (setAttr (Cell (sn{span_end=span_end sn}:|ssn) ("id",ident)) attrs) <>
324 xmlify inh'{inh_para = elementTitle : elementTitle : List.repeat elementPara} body
325 where
326 inh' = inh{inh_figure = False}
327 --
328 HeaderDotSlash _file -> xmlify inh ts
329 ----------------------
330 NodePair pair ->
331 case pair of
332 PairBracket | to <- Plain.writePlain ts
333 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
334 Seq.singleton $
335 element "rref" $
336 xmlAttrs [cell ("to",to)]
337 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
338 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
339 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
340 PairFrenchquote ->
341 Seq.singleton $
342 element "q" $
343 case ts of
344 (Seq.viewl -> Tree0 (Cell sl (NodeToken (TokenText l))) :< ls) ->
345 case Seq.viewr ls of
346 m :> Tree0 (Cell sr (NodeToken (TokenText r))) ->
347 xmlify inh $
348 Tree0 (Cell sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l))))
349 Seq.<|(m Seq.|>Tree0 (Cell sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r)))))
350 _ ->
351 xmlify inh $
352 Tree0 (Cell sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls
353 (Seq.viewr -> rs :> Tree0 (Cell sr (NodeToken (TokenText r)))) ->
354 xmlify inh $
355 rs Seq.|> Tree0 (Cell sr (NodeToken (TokenText (TL.dropAround Char.isSpace r))))
356 _ -> xmlify inh ts
357 PairHash ->
358 Seq.singleton $
359 element "ref" $
360 xmlAttrs [cell ("to",Plain.writePlain ts)]
361 PairElem name attrs ->
362 Seq.singleton $
363 element (xmlLocalName name) $
364 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
365 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
366 xmlify inh ts
367 _ ->
368 Seq.singleton (Tree0 $ Cell (sn{span_end=bn'}:|ssn) $ XmlText open) `unionXml`
369 xmlify inh ts `unionXml`
370 Seq.singleton (Tree0 $ Cell (sn{span_begin=en'}:|ssn) $ XmlText close)
371 where
372 (open, close) = pairBorders pair ts
373 bn' = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length open)}
374 en' = (span_end sn){pos_column=pos_column (span_end sn) - int (TL.length close)}
375 ----------------------
376 NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
377 ----------------------
378 NodeToken tok ->
379 case tok of
380 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c
381 TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
382 TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)]
383 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
384 ----------------------
385 NodeLower n as ->
386 Seq.singleton $
387 element "artwork" $
388 xmlify inh ts
389 where
390 cell :: a -> Cell a
391 cell = Cell ss
392 element :: XmlName -> XMLs -> XML
393 element n = Tree (cell $ XmlElem n)
394 instance Xmlify (Seq (Cell (XmlName,TL.Text))) where
395 xmlify _inh = xmlAttrs
396
397 -- * Elements
398
399 -- | Reserved elements' name
400 elems :: Set TL.Text
401 elems =
402 [ "about"
403 , "abstract"
404 , "address"
405 , "alias"
406 , "annotation"
407 , "area"
408 , "artwork"
409 , "aside"
410 , "audio"
411 , "author"
412 , "authors"
413 , "bcp14"
414 , "br"
415 , "break"
416 , "call"
417 , "city"
418 , "code"
419 , "comment"
420 , "comments"
421 , "country"
422 , "date"
423 , "dd"
424 , "define"
425 , "del"
426 , "div"
427 , "dl"
428 , "document"
429 , "dt"
430 , "editor"
431 , "email"
432 , "embed"
433 , "eref"
434 , "fax"
435 , "feed"
436 , "feedback"
437 , "figure"
438 , "filter"
439 , "format"
440 , "from"
441 , "h"
442 , "hi"
443 , "html5"
444 , "i"
445 , "index"
446 , "iref"
447 , "keyword"
448 , "li"
449 , "link"
450 , "name"
451 , "note"
452 , "ol"
453 , "organization"
454 , "para"
455 , "postamble"
456 , "preamble"
457 , "q"
458 , "ref"
459 , "reference"
460 , "references"
461 , "region"
462 , "rref"
463 , "sc"
464 , "section"
465 , "serie"
466 , "source"
467 , "span"
468 , "street"
469 , "style"
470 , "sub"
471 , "sup"
472 , "table"
473 , "tbody"
474 , "td"
475 , "tel"
476 , "tfoot"
477 , "title"
478 , "th"
479 , "thead"
480 , "toc"
481 , "tof"
482 , "tr"
483 , "tt"
484 , "u"
485 , "ul"
486 , "uri"
487 , "version"
488 , "video"
489 , "workgroup"
490 , "xml"
491 , "zipcode"
492 ]
493
494 -- * Attributes
495
496 -- | Convenient alias, forcing the types
497 xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs
498 xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>)
499
500 -- | Extract attributes
501 partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots)
502 partitionAttrs ts = (attrs,cs)
503 where
504 (as,cs) = (`Seq.partition` ts) $ \case
505 Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n
506 _ -> False
507 attrs = attr <$> as
508 attr = \case
509 Tree (Cell ssn (NodeHeader (HeaderEqual n _wh))) a ->
510 Cell ssn (xmlLocalName n, v)
511 where
512 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
513 _ -> undefined
514
515 getAttrId :: Root -> TL.Text
516 getAttrId = Plain.writePlain . Seq.singleton
517
518 setAttr ::
519 Cell (XmlName, TL.Text) ->
520 Seq (Cell (XmlName, TL.Text)) ->
521 Seq (Cell (XmlName, TL.Text))
522 setAttr a@(unCell -> (k, _v)) as =
523 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
524 Just idx -> Seq.update idx a as
525 Nothing -> a <| as
526
527 defaultAttr ::
528 Seq (Cell (XmlName, TL.Text)) ->
529 Cell (XmlName, TL.Text) ->
530 Seq (Cell (XmlName, TL.Text))
531 defaultAttr as a@(unCell -> (k, _v)) =
532 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
533 Just _idx -> as
534 Nothing -> a <| as
535
536 -- * Text
537
538 -- | Unify two 'XMLs', merging border 'XmlText's if any.
539 unionXml :: XMLs -> XMLs -> XMLs
540 unionXml x y =
541 case (Seq.viewr x, Seq.viewl y) of
542 (xs :> x0, y0 :< ys) ->
543 case (x0,y0) of
544 ( Tree0 (Cell ssx@(Span {span_file=fx}:|_sx) (XmlText tx))
545 , Tree0 (Cell ssy@(Span {span_file=fy}:|_sy) (XmlText ty)) ) | fx == fy ->
546 xs `unionXml`
547 Seq.singleton (Tree0 $ (XmlText <$>) $ Cell ssx tx <> Cell ssy ty) `unionXml`
548 ys
549 _ -> x <> y
550 (Seq.EmptyR, _) -> y
551 (_, Seq.EmptyL) -> x
552
553 unionsXml :: Foldable f => f XMLs -> XMLs
554 unionsXml = foldl' unionXml mempty