]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Add golden tests.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Ord (Ord(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
21 import Data.Set (Set)
22 import Data.TreeSeq.Strict (Tree(..))
23 import Data.Tuple (uncurry)
24 import Prelude (Num(..), undefined)
25 import qualified Data.Char as Char
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text.Lazy as TL
29 import qualified Language.TCT.Write.Plain as Plain
30 import qualified System.FilePath as FP
31
32 -- import Language.TCT.Debug
33 import Language.TCT hiding (Parser)
34 import Language.XML
35 import Text.Blaze.XML ()
36
37 -- | Main entry point
38 --
39 -- NOTE: 'XmlNode' are still annotated with 'Cell',
40 -- but nothing is done to preserve any ordering amongst them,
41 -- because 'Node's sometimes need to be reordered
42 -- (eg. about/title may have a title from the section before,
43 -- hence outside of about).
44 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
45 document :: Roots -> XMLs
46 document doc =
47 -- (`S.evalState` def) $
48 case Seq.viewl doc of
49 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
50 let (titles, content) = partitionSection sec in
51 case Seq.viewl titles of
52 (unTree -> Cell bt et _) :< _ ->
53 xmlify def
54 { inh_titles = titles
55 , inh_figure = True
56 } contentWithAbout <>
57 xmlify def foot
58 where
59 contentWithAbout =
60 case Seq.findIndexL isAbout content of
61 Nothing -> Tree (Cell bt et $ NodeHeader $ HeaderColon "about" "") mempty <| content
62 Just{} -> content
63 isAbout = \case
64 (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True
65 _ -> False
66 _ -> xmlify def doc
67 _ -> xmlify def doc
68
69 partitionSection :: Root -> (Roots, Roots)
70 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
71 case Seq.viewl body of
72 EmptyL -> mempty
73 title@(unTree -> Cell _bt et NodePara) :< rest ->
74 let (subtitles, content) = spanlSubtitles et rest in
75 (title <| (subtitles >>= subTrees), content)
76 where
77 spanlSubtitles ep ts =
78 case Seq.viewl ts of
79 sub@(unTree -> Cell bs es (NodeHeader (HeaderSection lvlSub))) :< rs
80 | lvlSub <= lvlPar
81 , pos_line bs - pos_line ep <= 1 ->
82 let (subs, ts') = spanlSubtitles es rs in
83 (sub <| subs, ts')
84 _ -> (mempty, ts)
85 partitionSection _ = mempty
86
87 -- * Type 'Inh'
88 data Inh
89 = Inh
90 { inh_figure :: Bool
91 , inh_para :: [Cell () -> XMLs -> XML]
92 , inh_titles :: Roots
93 }
94 instance Default Inh where
95 def = Inh
96 { inh_figure = False
97 , inh_para = List.repeat xmlPara
98 , inh_titles = mempty
99 }
100
101 -- ** 'inh_para'
102 xmlPara :: Cell a -> XMLs -> XML
103 xmlPara c = Tree (XmlElem "para" <$ c)
104
105 xmlTitle :: Cell a -> XMLs -> XML
106 xmlTitle c = Tree (XmlElem "title" <$ c)
107
108 xmlName :: Cell a -> XMLs -> XML
109 xmlName c = Tree (XmlElem "name" <$ c)
110
111 -- * Class 'Xmlify'
112 class Xmlify a where
113 xmlify :: Inh -> a -> XMLs
114 instance Xmlify Roots where
115 xmlify inh roots =
116 case Seq.viewl roots of
117 EmptyL -> mempty
118 r@(Tree cr@(Cell _br _er nr) ts) :< rs ->
119 case nr of
120 ----------------------
121 -- NOTE: HeaderColon becomes parent
122 -- of any continuous following-sibling HeaderBar or HeaderGreat
123 NodeHeader (HeaderColon n _wh)
124 | (span, rest) <- spanlHeaderColon rs
125 , not $ null span ->
126 xmlify inh $ Tree cr (ts<>span) <| rest
127 where
128 spanlHeaderColon :: Roots -> (Roots, Roots)
129 spanlHeaderColon =
130 Seq.spanl $ \case
131 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
132 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
133 _ -> False
134 ----------------------
135 -- NOTE: gather HeaderBrackets
136 NodeHeader HeaderBrackets{}
137 | (span,rest) <- spanlBrackets roots
138 , not (null span) ->
139 (<| xmlify inh rest) $
140 element "references" $
141 span >>= xmlify inh
142 where
143 spanlBrackets :: Roots -> (Roots, Roots)
144 spanlBrackets =
145 Seq.spanl $ \case
146 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
147 _ -> False
148 ----------------------
149 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
150 NodeText x
151 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
152 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
153 ----------------------
154 -- NOTE: detect [some text](http://some.url) or [SomeRef]
155 NodePair PairParen
156 | Tree (Cell bb eb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
157 (<| xmlify inh rs') $
158 case bracket of
159 (toList -> [unTree -> Cell bl el (NodeToken (TokenLink lnk))]) ->
160 element "eref" $
161 xmlAttrs [Cell bl el ("to",lnk)] <>
162 xmlify inh ts
163 _ ->
164 element "rref" $
165 xmlAttrs [Cell bb eb ("to",Plain.document bracket)] <>
166 xmlify inh ts
167 ----------------------
168 -- NOTE: gather HeaderDash
169 _ | (span, rest) <- spanlItems (==HeaderDash) roots
170 , not $ null span ->
171 (<| xmlify inh rest) $
172 element "ul" $
173 span >>= xmlify inh{inh_para=List.repeat xmlPara}
174 ----------------------
175 -- NOTE: gather HeaderDot
176 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
177 , not $ null span ->
178 (<| xmlify inh rest) $
179 element "ol" $
180 span >>= xmlify inh{inh_para=List.repeat xmlPara}
181 where
182 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
183 spanlItems liHeader =
184 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
185 case nod of
186 NodeHeader (HeaderColon "li" _wh) -> True
187 NodeHeader hdr -> liHeader hdr
188 NodePair (PairElem "li" _as) -> True
189 _ -> False
190 ----------------------
191 -- NOTE: context-free Root
192 _ ->
193 xmlify inh r <>
194 xmlify inh rs
195 where
196 element :: XmlName -> XMLs -> XML
197 element n = Tree (XmlElem n <$ cr)
198 {-
199 t@(Tree (NodePair (PairElem))) :< ts ->
200 case inh_para inh of
201 [] -> xmlify inh t <> go inh ts
202 _ | isTokenElem toks -> xmlify inh t <> go inh ts
203 tree0:inh_para ->
204 (case Seq.viewl toks of
205 EmptyL -> id
206 (unTree -> cell_begin -> bp) :< _ -> (tree0 bp (xmlify inh t) <|)) $
207 go inh{inh_para} ts
208 -}
209 instance Xmlify Root where
210 xmlify inh tr@(Tree cel@(Cell bp ep nod) ts) =
211 case nod of
212 NodeGroup -> xmlify inh ts
213 ----------------------
214 NodePara ->
215 case inh_para inh of
216 [] -> xmlify inh ts
217 para:inh_para ->
218 Seq.singleton $
219 para (() <$ cel) $
220 xmlify inh{inh_para} ts
221 ----------------------
222 NodeHeader hdr ->
223 case hdr of
224 --
225 HeaderSection{} ->
226 Seq.singleton $
227 element "section" $ head <> xmlify inh' body
228 where
229 (titles, content) = partitionSection tr
230 (attrs, body) = partitionAttrs content
231 head =
232 case Seq.viewl titles of
233 EmptyL -> mempty
234 title@(unTree -> ct) :< subtitles ->
235 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
236 aliases
237 where
238 aliases =
239 subtitles >>= \subtitle@(unTree -> cs) ->
240 return $
241 Tree (cs $> XmlElem "alias") $
242 xmlAttrs (return $ cs $> ("id",getAttrId subtitle))
243 inh' = inh
244 { inh_para = xmlTitle : List.repeat xmlPara
245 , inh_figure = True
246 }
247 --
248 HeaderColon n _wh ->
249 let (attrs,body) = partitionAttrs ts in
250 case n of
251 -- NOTE: insert titles into <about>.
252 "about" ->
253 Seq.singleton $
254 element "about" $
255 (inh_titles inh >>= xmlify inh') <>
256 xmlAttrs attrs <>
257 xmlify inh body
258 -- NOTE: in <figure> mode, unreserved nodes become <figure>
259 _ | inh_figure inh && n`List.notElem`elems ->
260 Seq.singleton $
261 element "figure" $
262 -- xmlAttrs (setAttr (Cell ep ep ("type",n)) attrs) <>
263 xmlAttrs (attrs `defaultAttr` Cell bp bp ("type", n)) <>
264 case toList body of
265 [Tree0{}] -> xmlify inh'{inh_para = List.repeat xmlPara} body
266 _ -> xmlify inh'{inh_para = xmlTitle : List.repeat xmlPara} body
267 -- NOTE: reserved nodes
268 _ ->
269 Seq.singleton $
270 element (xmlLocalName n) $
271 xmlAttrs attrs <>
272 xmlify inh' ts
273 where
274 inh' = inh
275 { inh_para =
276 case n of
277 "about" -> xmlTitle : List.repeat xmlPara
278 "reference" -> xmlTitle : List.repeat xmlPara
279 "serie" -> List.repeat xmlName
280 "author" -> List.repeat xmlName
281 "editor" -> List.repeat xmlName
282 "org" -> List.repeat xmlName
283 _ -> []
284 }
285 ----------------------
286 HeaderBar n _wh ->
287 Seq.singleton $
288 element "artwork" $
289 xmlAttrs (Seq.singleton $ Cell bp bp ("type", if TL.null n then "txt" else n)) <>
290 xmlify inh{inh_para=[]} ts
291 ----------------------
292 HeaderGreat n _wh ->
293 Seq.singleton $
294 let (attrs,body) = partitionAttrs ts in
295 element "artwork" $
296 xmlAttrs (attrs `defaultAttr` Cell bp bp ("type", if TL.null n then "quote" else n)) <>
297 xmlify inh{inh_para=[]} body
298 --
299 HeaderEqual n _wh -> Seq.singleton $ element (xmlLocalName n) $ xmlify inh ts
300 --
301 HeaderDot _n -> Seq.singleton $ element "li" $ xmlify inh ts
302 --
303 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
304 --
305 HeaderDashDash -> Seq.singleton $ Tree0 $ cell $ XmlComment $
306 -- debug1_ ("TS", ts) $
307 -- debug1_ ("RS", (S.evalState (Plain.rackUpLeft ts) Nothing)) $
308 Plain.document ts
309 -- Plain.text def $ S.evalState (Plain.rackUpLeft ts) Nothing
310 {-
311 TreeSeq.mapAlsoNode
312 (cell1 . unCell)
313 (\_k -> fmap $
314 TreeSeq.mapAlsoNode
315 (cell1 . unCell)
316 (\_k' -> cell1 . unCell)) <$> ts
317 -}
318 --
319 HeaderBrackets ident ->
320 let (attrs,body) = partitionAttrs ts in
321 Seq.singleton $
322 element "reference" $
323 xmlAttrs (setAttr (Cell ep ep ("id",ident)) attrs) <>
324 xmlify inh'{inh_para = xmlTitle : xmlTitle : List.repeat xmlPara} body
325 where
326 inh' = inh{inh_figure = False}
327 --
328 HeaderDotSlash p ->
329 Seq.singleton $
330 element "include" $
331 xmlAttrs [cell ("href",TL.pack $ FP.replaceExtension p "dtc")] <>
332 xmlify inh ts
333 ----------------------
334 NodePair pair ->
335 case pair of
336 PairBracket | to <- Plain.document ts
337 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
338 Seq.singleton $
339 element "rref" $
340 xmlAttrs [cell ("to",to)]
341 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
342 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
343 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
344 PairFrenchquote ->
345 Seq.singleton $
346 element "q" $
347 xmlify inh ts
348 {-
349 case ts of
350 (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) ->
351 case Seq.viewr ls of
352 m :> Tree0 (Cell br er (TokenPlain r)) ->
353 xmlify inh $
354 Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l)))
355 <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r))))
356 _ ->
357 xmlify inh $
358 Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls
359 (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) ->
360 xmlify inh $
361 rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r)))
362 _ -> xmlify inh ts
363 -}
364 PairHash ->
365 Seq.singleton $
366 element "ref" $
367 xmlAttrs [cell ("to",Plain.document ts)]
368 PairElem name attrs ->
369 Seq.singleton $
370 element (xmlLocalName name) $
371 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
372 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
373 xmlify inh ts
374 _ ->
375 let (open, close) = pairBorders pair ts in
376 Seq.singleton (Tree0 $ Cell bp bp $ XmlText open) `unionXml`
377 xmlify inh ts `unionXml`
378 Seq.singleton (Tree0 $ Cell ep ep $ XmlText close)
379 ----------------------
380 NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
381 ----------------------
382 NodeToken tok ->
383 case tok of
384 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c
385 TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
386 TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)]
387 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
388 ----------------------
389 NodeLower n as ->
390 Seq.singleton $
391 element "artwork" $
392 xmlify inh ts
393 where
394 cell :: a -> Cell a
395 cell = Cell bp ep
396 element :: XmlName -> XMLs -> XML
397 element n = Tree (cell $ XmlElem n)
398 instance Xmlify (Seq (Cell (XmlName,TL.Text))) where
399 xmlify _inh = xmlAttrs
400
401 -- * Elements
402
403 -- | Reserved elements' name
404 elems :: Set TL.Text
405 elems =
406 [ "about"
407 , "abstract"
408 , "address"
409 , "alias"
410 , "annotation"
411 , "area"
412 , "artwork"
413 , "aside"
414 , "audio"
415 , "author"
416 , "authors"
417 , "bcp14"
418 , "br"
419 , "call"
420 , "city"
421 , "code"
422 , "comment"
423 , "comments"
424 , "country"
425 , "date"
426 , "dd"
427 , "define"
428 , "del"
429 , "div"
430 , "dl"
431 , "document"
432 , "dt"
433 , "editor"
434 , "email"
435 , "embed"
436 , "eref"
437 , "fax"
438 , "feed"
439 , "feedback"
440 , "figure"
441 , "filter"
442 , "format"
443 , "from"
444 , "h"
445 , "hi"
446 , "html5"
447 , "i"
448 , "index"
449 , "iref"
450 , "keyword"
451 , "li"
452 , "link"
453 , "name"
454 , "note"
455 , "ol"
456 , "organization"
457 , "para"
458 , "postamble"
459 , "preamble"
460 , "q"
461 , "ref"
462 , "reference"
463 , "references"
464 , "region"
465 , "rref"
466 , "sc"
467 , "section"
468 , "serie"
469 , "source"
470 , "span"
471 , "street"
472 , "style"
473 , "sub"
474 , "sup"
475 , "table"
476 , "tbody"
477 , "td"
478 , "tel"
479 , "tfoot"
480 , "title"
481 , "th"
482 , "thead"
483 , "toc"
484 , "tof"
485 , "tr"
486 , "tt"
487 , "u"
488 , "ul"
489 , "uri"
490 , "version"
491 , "video"
492 , "workgroup"
493 , "xml"
494 , "zipcode"
495 ]
496
497 -- * Attributes
498
499 -- | Convenient alias, forcing the types
500 xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs
501 xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>)
502
503 -- | Extract attributes
504 partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots)
505 partitionAttrs ts = (attrs,cs)
506 where
507 (as,cs) = (`Seq.partition` ts) $ \case
508 Tree (unCell -> NodeHeader (HeaderEqual (TL.null -> False) _wh)) _cs -> True
509 _ -> False
510 attrs = attr <$> as
511 attr = \case
512 Tree (Cell bp ep (NodeHeader (HeaderEqual n _wh))) a ->
513 Cell bp ep (xmlLocalName n, v)
514 where
515 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
516 _ -> undefined
517
518 getAttrId :: Root -> TL.Text
519 getAttrId = Plain.document . Seq.singleton
520
521 setAttr ::
522 Cell (XmlName, TL.Text) ->
523 Seq (Cell (XmlName, TL.Text)) ->
524 Seq (Cell (XmlName, TL.Text))
525 setAttr a@(unCell -> (k, _v)) as =
526 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
527 Just idx -> Seq.update idx a as
528 Nothing -> a <| as
529
530 defaultAttr ::
531 Seq (Cell (XmlName, TL.Text)) ->
532 Cell (XmlName, TL.Text) ->
533 Seq (Cell (XmlName, TL.Text))
534 defaultAttr as a@(unCell -> (k, _v)) =
535 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
536 Just _idx -> as
537 Nothing -> a <| as
538
539 -- * Text
540
541 -- | Unify two 'XMLs', merging border 'XmlText's if any.
542 unionXml :: XMLs -> XMLs -> XMLs
543 unionXml x y =
544 case (Seq.viewr x, Seq.viewl y) of
545 (xs :> x0, y0 :< ys) ->
546 case (x0,y0) of
547 ( Tree0 (Cell bx ex (XmlText tx))
548 , Tree0 (Cell by ey (XmlText ty)) ) ->
549 xs `unionXml`
550 Seq.singleton (Tree0 $ (XmlText <$>) $ Cell bx ex tx <> Cell by ey ty) `unionXml`
551 ys
552 _ -> x <> y
553 (Seq.EmptyR, _) -> y
554 (_, Seq.EmptyL) -> x
555
556 unionsXml :: Foldable f => f XMLs -> XMLs
557 unionsXml = foldl' unionXml mempty