]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Add golden tests for DTC.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Ord (Ord(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
21 import Data.Set (Set)
22 import Data.TreeSeq.Strict (Tree(..))
23 import Data.Tuple (uncurry)
24 import Prelude (Num(..), undefined)
25 import qualified Data.Char as Char
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text.Lazy as TL
29 import qualified Language.TCT.Write.Plain as Plain
30 import qualified System.FilePath as FP
31
32 -- import Language.TCT.Debug
33 import Language.TCT hiding (Parser)
34 import Language.XML
35 import Text.Blaze.XML ()
36
37 -- | Main entry point
38 --
39 -- NOTE: 'XmlNode' are still annotated with 'Cell',
40 -- but nothing is done to preserve any ordering amongst them,
41 -- because 'Node's sometimes need to be reordered
42 -- (eg. about/title may have a title from the section before,
43 -- hence outside of about).
44 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
45 document :: Roots -> XMLs
46 document doc =
47 -- (`S.evalState` def) $
48 case Seq.viewl doc of
49 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
50 let (titles, content) = partitionSection sec in
51 case Seq.viewl titles of
52 (unTree -> Cell bt et _) :< _ ->
53 xmlify def
54 { inh_titles = titles
55 , inh_figure = True
56 } contentWithAbout <>
57 xmlify def foot
58 where
59 contentWithAbout =
60 case Seq.findIndexL isAbout content of
61 Nothing -> Tree (Cell bt et $ NodeHeader $ HeaderColon "about" "") mempty <| content
62 Just{} -> content
63 isAbout = \case
64 (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True
65 _ -> False
66 _ -> xmlify def doc
67 _ -> xmlify def doc
68
69 partitionSection :: Root -> (Roots, Roots)
70 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
71 case Seq.viewl body of
72 EmptyL -> mempty
73 title@(unTree -> Cell _bt et NodePara) :< rest ->
74 let (subtitles, content) = spanlSubtitles et rest in
75 (title <| (subtitles >>= subTrees), content)
76 where
77 spanlSubtitles ep ts =
78 case Seq.viewl ts of
79 sub@(unTree -> Cell bs es (NodeHeader (HeaderSection lvlSub))) :< rs
80 | lvlSub <= lvlPar
81 , pos_line bs - pos_line ep <= 1 ->
82 let (subs, ts') = spanlSubtitles es rs in
83 (sub <| subs, ts')
84 _ -> (mempty, ts)
85 _ -> (mempty, body)
86 partitionSection _ = mempty
87
88 -- * Type 'Inh'
89 data Inh
90 = Inh
91 { inh_figure :: Bool
92 , inh_para :: [Cell () -> XMLs -> XML]
93 , inh_titles :: Roots
94 }
95 instance Default Inh where
96 def = Inh
97 { inh_figure = False
98 , inh_para = List.repeat xmlPara
99 , inh_titles = mempty
100 }
101
102 -- ** 'inh_para'
103 xmlPara :: Cell a -> XMLs -> XML
104 xmlPara c = Tree (XmlElem "para" <$ c)
105
106 xmlTitle :: Cell a -> XMLs -> XML
107 xmlTitle c = Tree (XmlElem "title" <$ c)
108
109 xmlName :: Cell a -> XMLs -> XML
110 xmlName c = Tree (XmlElem "name" <$ c)
111
112 -- * Class 'Xmlify'
113 class Xmlify a where
114 xmlify :: Inh -> a -> XMLs
115 instance Xmlify Roots where
116 xmlify inh roots =
117 case Seq.viewl roots of
118 EmptyL -> mempty
119 r@(Tree cr@(Cell _br _er nr) ts) :< rs ->
120 case nr of
121 ----------------------
122 -- NOTE: HeaderColon becomes parent
123 -- of any continuous following-sibling HeaderBar or HeaderGreat
124 NodeHeader (HeaderColon n _wh)
125 | (span, rest) <- spanlHeaderColon rs
126 , not $ null span ->
127 xmlify inh $ Tree cr (ts<>span) <| rest
128 where
129 spanlHeaderColon :: Roots -> (Roots, Roots)
130 spanlHeaderColon =
131 Seq.spanl $ \case
132 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
133 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
134 _ -> False
135 ----------------------
136 -- NOTE: gather HeaderBrackets
137 NodeHeader HeaderBrackets{}
138 | (span,rest) <- spanlBrackets roots
139 , not (null span) ->
140 (<| xmlify inh rest) $
141 element "references" $
142 span >>= xmlify inh
143 where
144 spanlBrackets :: Roots -> (Roots, Roots)
145 spanlBrackets =
146 Seq.spanl $ \case
147 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
148 _ -> False
149 ----------------------
150 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
151 NodeText x
152 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
153 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
154 ----------------------
155 -- NOTE: detect [some text](http://some.url) or [SomeRef]
156 NodePair PairParen
157 | Tree (Cell bb eb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
158 (<| xmlify inh rs') $
159 case bracket of
160 (toList -> [unTree -> Cell bl el (NodeToken (TokenLink lnk))]) ->
161 element "eref" $
162 xmlAttrs [Cell bl el ("to",lnk)] <>
163 xmlify inh ts
164 _ ->
165 element "rref" $
166 xmlAttrs [Cell bb eb ("to",Plain.document bracket)] <>
167 xmlify inh ts
168 ----------------------
169 -- NOTE: gather HeaderDash
170 _ | (span, rest) <- spanlItems (==HeaderDash) roots
171 , not $ null span ->
172 (<| xmlify inh rest) $
173 element "ul" $
174 span >>= xmlify inh{inh_para=List.repeat xmlPara}
175 ----------------------
176 -- NOTE: gather HeaderDot
177 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
178 , not $ null span ->
179 (<| xmlify inh rest) $
180 element "ol" $
181 span >>= xmlify inh{inh_para=List.repeat xmlPara}
182 where
183 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
184 spanlItems liHeader =
185 Seq.spanl $ \(unTree -> (unCell -> nod)) ->
186 case nod of
187 NodeHeader (HeaderColon "li" _wh) -> True
188 NodeHeader hdr -> liHeader hdr
189 NodePair (PairElem "li" _as) -> True
190 _ -> False
191 ----------------------
192 NodePara | para:inh_para <- inh_para inh ->
193 para (() <$ cr) (xmlify inh ts) <|
194 xmlify inh{inh_para} rs
195 ----------------------
196 -- NOTE: context-free Root
197 _ ->
198 xmlify inh r <>
199 xmlify inh rs
200 where
201 element :: XmlName -> XMLs -> XML
202 element n = Tree (XmlElem n <$ cr)
203 {-
204 t@(Tree (NodePair (PairElem))) :< ts ->
205 case inh_para inh of
206 [] -> xmlify inh t <> go inh ts
207 _ | isTokenElem toks -> xmlify inh t <> go inh ts
208 tree0:inh_para ->
209 (case Seq.viewl toks of
210 EmptyL -> id
211 (unTree -> cell_begin -> bp) :< _ -> (tree0 bp (xmlify inh t) <|)) $
212 go inh{inh_para} ts
213 -}
214 instance Xmlify Root where
215 xmlify inh tn@(Tree (Cell bn en nod) ts) =
216 case nod of
217 NodeGroup -> xmlify inh ts
218 ----------------------
219 NodePara -> xmlify inh ts
220 ----------------------
221 NodeHeader hdr ->
222 case hdr of
223 --
224 HeaderSection{} ->
225 Seq.singleton $
226 element "section" $ head <> xmlify inh' body
227 where
228 (titles, content) = partitionSection tn
229 (attrs, body) = partitionAttrs content
230 head =
231 case Seq.viewl titles of
232 EmptyL -> mempty
233 title@(unTree -> ct) :< subtitles ->
234 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
235 aliases
236 where
237 aliases =
238 subtitles >>= \subtitle@(unTree -> cs) ->
239 return $
240 Tree (cs $> XmlElem "alias") $
241 xmlAttrs (return $ cs $> ("id",getAttrId subtitle))
242 inh' = inh
243 { inh_para = xmlTitle : List.repeat xmlPara
244 , inh_figure = True
245 }
246 --
247 HeaderColon n _wh ->
248 let (attrs,body) = partitionAttrs ts in
249 case n of
250 -- NOTE: insert titles into <about>.
251 "about" ->
252 Seq.singleton $
253 element "about" $
254 (xmlify inh' $ inh_titles inh) <>
255 xmlAttrs attrs <>
256 xmlify inh' body
257 -- NOTE: in <figure> mode, unreserved nodes become <figure>
258 _ | inh_figure inh && n`List.notElem`elems || TL.null n ->
259 Seq.singleton $
260 element "figure" $
261 -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <>
262 xmlAttrs (attrs `defaultAttr` Cell bn bn ("type", n)) <>
263 case toList body of
264 [Tree0{}] -> xmlify inh'{inh_para = List.repeat xmlPara} body
265 _ -> xmlify inh'{inh_para = xmlTitle : List.repeat xmlPara} body
266 -- NOTE: reserved nodes
267 _ ->
268 Seq.singleton $
269 element (xmlLocalName n) $
270 xmlAttrs attrs <>
271 xmlify inh' ts
272 where
273 inh' = inh
274 { inh_para =
275 case n of
276 "about" -> List.repeat xmlTitle
277 "reference" -> xmlTitle : List.repeat xmlPara
278 "serie" -> List.repeat xmlName
279 "author" -> List.repeat xmlName
280 "editor" -> List.repeat xmlName
281 "org" -> List.repeat xmlName
282 _ -> []
283 }
284 ----------------------
285 HeaderBar n _wh ->
286 Seq.singleton $
287 element "artwork" $
288 xmlAttrs (Seq.singleton $ Cell bn bn ("type", n)) <>
289 xmlify inh{inh_para=[]} ts
290 ----------------------
291 HeaderGreat n _wh ->
292 Seq.singleton $
293 let (attrs,body) = partitionAttrs ts in
294 element "quote" $
295 xmlAttrs (attrs `defaultAttr` Cell bn bn ("type", n)) <>
296 xmlify inh{inh_para=List.repeat xmlPara} body
297 --
298 HeaderEqual n _wh -> Seq.singleton $ element (xmlLocalName n) $ xmlify inh ts
299 --
300 HeaderDot _n -> Seq.singleton $ element "li" $ xmlify inh ts
301 --
302 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
303 --
304 HeaderDashDash -> Seq.singleton $ Tree0 $ cell $ XmlComment $
305 -- debug1_ ("TS", ts) $
306 -- debug1_ ("RS", (S.evalState (Plain.rackUpLeft ts) Nothing)) $
307 Plain.document ts
308 -- Plain.text def $ S.evalState (Plain.rackUpLeft ts) Nothing
309 {-
310 TreeSeq.mapAlsoNode
311 (cell1 . unCell)
312 (\_k -> fmap $
313 TreeSeq.mapAlsoNode
314 (cell1 . unCell)
315 (\_k' -> cell1 . unCell)) <$> ts
316 -}
317 --
318 HeaderBrackets ident ->
319 let (attrs,body) = partitionAttrs ts in
320 Seq.singleton $
321 element "reference" $
322 xmlAttrs (setAttr (Cell en en ("id",ident)) attrs) <>
323 xmlify inh'{inh_para = xmlTitle : xmlTitle : List.repeat xmlPara} body
324 where
325 inh' = inh{inh_figure = False}
326 --
327 HeaderDotSlash p ->
328 Seq.singleton $
329 element "include" $
330 xmlAttrs [cell ("href",TL.pack $ FP.replaceExtension p "dtc")] <>
331 xmlify inh ts
332 ----------------------
333 NodePair pair ->
334 case pair of
335 PairBracket | to <- Plain.document ts
336 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
337 Seq.singleton $
338 element "rref" $
339 xmlAttrs [cell ("to",to)]
340 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
341 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
342 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
343 PairFrenchquote ->
344 Seq.singleton $
345 element "q" $
346 xmlify inh ts
347 {-
348 case ts of
349 (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) ->
350 case Seq.viewr ls of
351 m :> Tree0 (Cell br er (TokenPlain r)) ->
352 xmlify inh $
353 Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l)))
354 <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r))))
355 _ ->
356 xmlify inh $
357 Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls
358 (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) ->
359 xmlify inh $
360 rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r)))
361 _ -> xmlify inh ts
362 -}
363 PairHash ->
364 Seq.singleton $
365 element "ref" $
366 xmlAttrs [cell ("to",Plain.document ts)]
367 PairElem name attrs ->
368 Seq.singleton $
369 element (xmlLocalName name) $
370 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
371 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
372 xmlify inh ts
373 _ ->
374 let (open, close) = pairBorders pair ts in
375 Seq.singleton (Tree0 $ Cell bn bn $ XmlText open) `unionXml`
376 xmlify inh ts `unionXml`
377 Seq.singleton (Tree0 $ Cell en en $ XmlText close)
378 ----------------------
379 NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
380 ----------------------
381 NodeToken tok ->
382 case tok of
383 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c
384 TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
385 TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)]
386 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
387 ----------------------
388 NodeLower n as ->
389 Seq.singleton $
390 element "artwork" $
391 xmlify inh ts
392 where
393 cell :: a -> Cell a
394 cell = Cell bn en
395 element :: XmlName -> XMLs -> XML
396 element n = Tree (cell $ XmlElem n)
397 instance Xmlify (Seq (Cell (XmlName,TL.Text))) where
398 xmlify _inh = xmlAttrs
399
400 -- * Elements
401
402 -- | Reserved elements' name
403 elems :: Set TL.Text
404 elems =
405 [ "about"
406 , "abstract"
407 , "address"
408 , "alias"
409 , "annotation"
410 , "area"
411 , "artwork"
412 , "aside"
413 , "audio"
414 , "author"
415 , "authors"
416 , "bcp14"
417 , "br"
418 , "call"
419 , "city"
420 , "code"
421 , "comment"
422 , "comments"
423 , "country"
424 , "date"
425 , "dd"
426 , "define"
427 , "del"
428 , "div"
429 , "dl"
430 , "document"
431 , "dt"
432 , "editor"
433 , "email"
434 , "embed"
435 , "eref"
436 , "fax"
437 , "feed"
438 , "feedback"
439 , "figure"
440 , "filter"
441 , "format"
442 , "from"
443 , "h"
444 , "hi"
445 , "html5"
446 , "i"
447 , "index"
448 , "iref"
449 , "keyword"
450 , "li"
451 , "link"
452 , "name"
453 , "note"
454 , "ol"
455 , "organization"
456 , "para"
457 , "postamble"
458 , "preamble"
459 , "q"
460 , "ref"
461 , "reference"
462 , "references"
463 , "region"
464 , "rref"
465 , "sc"
466 , "section"
467 , "serie"
468 , "source"
469 , "span"
470 , "street"
471 , "style"
472 , "sub"
473 , "sup"
474 , "table"
475 , "tbody"
476 , "td"
477 , "tel"
478 , "tfoot"
479 , "title"
480 , "th"
481 , "thead"
482 , "toc"
483 , "tof"
484 , "tr"
485 , "tt"
486 , "u"
487 , "ul"
488 , "uri"
489 , "version"
490 , "video"
491 , "workgroup"
492 , "xml"
493 , "zipcode"
494 ]
495
496 -- * Attributes
497
498 -- | Convenient alias, forcing the types
499 xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs
500 xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>)
501
502 -- | Extract attributes
503 partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots)
504 partitionAttrs ts = (attrs,cs)
505 where
506 (as,cs) = (`Seq.partition` ts) $ \case
507 Tree (unCell -> NodeHeader (HeaderEqual (TL.null -> False) _wh)) _cs -> True
508 _ -> False
509 attrs = attr <$> as
510 attr = \case
511 Tree (Cell bp ep (NodeHeader (HeaderEqual n _wh))) a ->
512 Cell bp ep (xmlLocalName n, v)
513 where
514 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
515 _ -> undefined
516
517 getAttrId :: Root -> TL.Text
518 getAttrId = Plain.document . Seq.singleton
519
520 setAttr ::
521 Cell (XmlName, TL.Text) ->
522 Seq (Cell (XmlName, TL.Text)) ->
523 Seq (Cell (XmlName, TL.Text))
524 setAttr a@(unCell -> (k, _v)) as =
525 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
526 Just idx -> Seq.update idx a as
527 Nothing -> a <| as
528
529 defaultAttr ::
530 Seq (Cell (XmlName, TL.Text)) ->
531 Cell (XmlName, TL.Text) ->
532 Seq (Cell (XmlName, TL.Text))
533 defaultAttr as a@(unCell -> (k, _v)) =
534 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
535 Just _idx -> as
536 Nothing -> a <| as
537
538 -- * Text
539
540 -- | Unify two 'XMLs', merging border 'XmlText's if any.
541 unionXml :: XMLs -> XMLs -> XMLs
542 unionXml x y =
543 case (Seq.viewr x, Seq.viewl y) of
544 (xs :> x0, y0 :< ys) ->
545 case (x0,y0) of
546 ( Tree0 (Cell bx ex (XmlText tx))
547 , Tree0 (Cell by ey (XmlText ty)) ) ->
548 xs `unionXml`
549 Seq.singleton (Tree0 $ (XmlText <$>) $ Cell bx ex tx <> Cell by ey ty) `unionXml`
550 ys
551 _ -> x <> y
552 (Seq.EmptyR, _) -> y
553 (_, Seq.EmptyL) -> x
554
555 unionsXml :: Foldable f => f XMLs -> XMLs
556 unionsXml = foldl' unionXml mempty