]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Fix writing TCT to XML.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Semigroup (Semigroup(..))
19 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
20 import Data.Set (Set)
21 import Data.TreeSeq.Strict (Tree(..))
22 import Data.Tuple (uncurry)
23 import Prelude (undefined)
24 import qualified Data.Char as Char
25 import qualified Data.List as List
26 import qualified Data.Sequence as Seq
27 import qualified Data.Text.Lazy as TL
28 import qualified Language.TCT.Write.Plain as Plain
29 import qualified System.FilePath as FP
30
31 import Text.Blaze.XML ()
32 import Language.TCT hiding (Parser)
33 -- import Language.TCT.Debug
34 import Language.XML
35
36 -- | Main entry point
37 --
38 -- NOTE: 'XmlNode' are still annotated with 'Cell',
39 -- but nothing is done to preserve any ordering amongst them,
40 -- because 'Node's sometimes need to be reordered
41 -- (eg. about/title may have a title from the section before,
42 -- hence outside of about).
43 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
44 xmlDocument :: Roots -> XMLs
45 xmlDocument doc =
46 -- (`S.evalState` def) $
47 case Seq.viewl doc of
48 Tree (unCell -> NodeHeader HeaderSection{}) body :< foot ->
49 case Seq.viewl body of
50 title@(unTree -> Cell bt et NodePara{}) :< content ->
51 xmlify def
52 { inh_titles = return title
53 , inh_figure = True
54 } contentWithAbout <>
55 xmlify def foot
56 where
57 contentWithAbout =
58 case Seq.findIndexL isAbout content of
59 Nothing -> Tree (Cell bt et $ NodeHeader $ HeaderColon "about" "") mempty <| content
60 Just{} -> content
61 isAbout = \case
62 (unTree -> unCell -> NodeHeader (HeaderColon "about" _wh)) -> True
63 _ -> False
64 _ -> xmlify def doc
65 _ -> xmlify def doc
66
67 {-
68 -- * Type 'Xmls'
69 type Xmls = S.State State XMLs
70 type Xml = S.State State XML
71 instance Semigroup Xmls where
72 (<>) = liftA2 (<>)
73 instance Monoid Xmls where
74 mempty = return mempty
75 mappend = (<>)
76
77 -- * Type 'State'
78 data State
79 = State
80 { state_pos :: Pos
81 }
82 instance Default State where
83 def = State
84 { state_pos = pos1
85 }
86 -}
87
88 -- * Type 'Inh'
89 data Inh
90 = Inh
91 { inh_figure :: Bool
92 , inh_para :: [Cell () -> XMLs -> XML]
93 , inh_titles :: Roots
94 }
95 instance Default Inh where
96 def = Inh
97 { inh_figure = False
98 , inh_para = List.repeat xmlPara
99 , inh_titles = mempty
100 }
101
102 -- ** 'inh_para'
103 xmlPara :: Cell a -> XMLs -> XML
104 xmlPara c = Tree (XmlElem "para" <$ c)
105
106 xmlTitle :: Cell a -> XMLs -> XML
107 xmlTitle c = Tree (XmlElem "title" <$ c)
108
109 xmlName :: Cell a -> XMLs -> XML
110 xmlName c = Tree (XmlElem "name" <$ c)
111
112 -- * Class 'Xmlify'
113 class Xmlify a where
114 xmlify :: Inh -> a -> XMLs
115 instance Xmlify Roots where
116 xmlify inh roots =
117 case Seq.viewl roots of
118 EmptyL -> mempty
119 r@(Tree cr@(Cell _br _er nr) ts) :< rs ->
120 case nr of
121 ----------------------
122 -- NOTE: HeaderColon becomes parent
123 -- of any continuous following-sibling HeaderBar or HeaderGreat
124 NodeHeader (HeaderColon n _wh)
125 | (span, rest) <- spanlHeaderColon rs
126 , not $ null span ->
127 xmlify inh $ Tree cr (ts<>span) <| rest
128 where
129 spanlHeaderColon :: Roots -> (Roots, Roots)
130 spanlHeaderColon =
131 Seq.spanl $ \case
132 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
133 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
134 _ -> False
135 ----------------------
136 -- NOTE: gather HeaderBrackets
137 NodeHeader HeaderBrackets{}
138 | (span,rest) <- spanlBrackets roots
139 , not (null span) ->
140 (<| xmlify inh rest) $
141 element "references" $
142 span >>= xmlify inh
143 where
144 spanlBrackets :: Roots -> (Roots, Roots)
145 spanlBrackets =
146 Seq.spanl $ \case
147 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
148 _ -> False
149 ----------------------
150 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
151 NodeText x
152 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
153 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
154 ----------------------
155 -- NOTE: detect [some text](http://some.url) or [SomeRef]
156 NodePair PairParen
157 | Tree (Cell bb eb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
158 (<| xmlify inh rs') $
159 case bracket of
160 (toList -> [unTree -> Cell bl el (NodeToken (TokenLink lnk))]) ->
161 element "eref" $
162 xmlAttrs [Cell bl el ("to",lnk)] <>
163 xmlify inh ts
164 _ ->
165 element "rref" $
166 xmlAttrs [Cell bb eb ("to",Plain.plainDocument bracket)] <>
167 xmlify inh ts
168 ----------------------
169 -- NOTE: gather HeaderDash
170 _ | (span, rest) <- spanlItems (==HeaderDash) roots
171 , not $ null span ->
172 (<| xmlify inh rest) $
173 element "ul" $
174 span >>= xmlify inh{inh_para=List.repeat xmlPara}
175 ----------------------
176 -- NOTE: gather HeaderDot
177 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
178 , not $ null span ->
179 (<| xmlify inh rest) $
180 element "ol" $
181 span >>= xmlify inh{inh_para=List.repeat xmlPara}
182 where
183 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
184 spanlItems liHeader =
185 Seq.spanl $ \(unTree -> unCell -> nod) ->
186 case nod of
187 NodeHeader (HeaderColon "li" _wh) -> True
188 NodeHeader hdr -> liHeader hdr
189 NodePair (PairElem "li" _as) -> True
190 _ -> False
191 ----------------------
192 -- NOTE: context-free Root
193 _ ->
194 xmlify inh r <>
195 xmlify inh rs
196 where
197 element :: XmlName -> XMLs -> XML
198 element n = Tree (XmlElem n <$ cr)
199 {-
200 t@(Tree (NodePair (PairElem))) :< ts ->
201 case inh_para inh of
202 [] -> xmlify inh t <> go inh ts
203 _ | isTokenElem toks -> xmlify inh t <> go inh ts
204 tree0:inh_para ->
205 (case Seq.viewl toks of
206 EmptyL -> id
207 (unTree -> cell_begin -> bp) :< _ -> (tree0 bp (xmlify inh t) <|)) $
208 go inh{inh_para} ts
209 -}
210 instance Xmlify Root where
211 xmlify inh (Tree cel@(Cell bp ep nod) ts) =
212 case nod of
213 NodeGroup -> xmlify inh ts
214 ----------------------
215 NodePara ->
216 case inh_para inh of
217 [] -> xmlify inh ts
218 para:inh_para ->
219 Seq.singleton $
220 para (() <$ cel) $
221 xmlify inh{inh_para} ts
222 ----------------------
223 NodeHeader hdr ->
224 case hdr of
225 --
226 HeaderSection{} ->
227 let (attrs,body) = partitionAttrs ts in
228 Seq.singleton $
229 element "section" $
230 xmlAttrs (attrs `defaultAttr` Cell ep ep ("id",getAttrId body)) <>
231 xmlify inh' body
232 where
233 inh' = inh
234 { inh_para = xmlTitle : List.repeat xmlPara
235 , inh_figure = True
236 }
237 --
238 HeaderColon n _wh ->
239 let (attrs,body) = partitionAttrs ts in
240 case n of
241 -- NOTE: insert titles into <about>.
242 "about" ->
243 Seq.singleton $
244 element "about" $
245 (inh_titles inh >>= xmlify inh') <>
246 xmlAttrs attrs <>
247 xmlify inh body
248 -- NOTE: in <figure> mode, unreserved nodes become <figure>
249 _ | inh_figure inh && not (n`List.elem`elems) ->
250 Seq.singleton $
251 element "figure" $
252 xmlAttrs (setAttr (Cell ep ep ("type",n)) attrs) <>
253 case toList body of
254 [Tree0{}] -> xmlify inh'{inh_para = List.repeat xmlPara} body
255 _ -> xmlify inh'{inh_para = xmlTitle : List.repeat xmlPara} body
256 -- NOTE: reserved nodes
257 _ ->
258 Seq.singleton $
259 element (xmlLocalName n) $
260 xmlAttrs attrs <>
261 xmlify inh' ts
262 where
263 inh' = inh
264 { inh_para =
265 case n of
266 "about" -> xmlTitle : List.repeat xmlPara
267 "reference" -> xmlTitle : List.repeat xmlPara
268 "serie" -> List.repeat xmlName
269 "author" -> List.repeat xmlName
270 "editor" -> List.repeat xmlName
271 "org" -> List.repeat xmlName
272 _ -> []
273 }
274 ----------------------
275 HeaderBar n _wh ->
276 Seq.singleton $
277 element "artwork" $
278 xmlAttrs (Seq.singleton $ Cell bp bp ("type", if TL.null n then "txt" else n)) <>
279 xmlify inh{inh_para=[]} ts
280 ----------------------
281 HeaderGreat n _wh ->
282 Seq.singleton $
283 let (attrs,body) = partitionAttrs ts in
284 element "artwork" $
285 xmlAttrs (attrs `defaultAttr` Cell bp bp ("type", if TL.null n then "quote" else n)) <>
286 xmlify inh{inh_para=[]} body
287 --
288 HeaderEqual n _wh -> Seq.singleton $ element (xmlLocalName n) $ xmlify inh ts
289 --
290 HeaderDot _n -> Seq.singleton $ element "li" $ xmlify inh ts
291 --
292 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
293 --
294 HeaderDashDash -> Seq.singleton $ Tree0 $ cell $ XmlComment $
295 -- debug1_ ("TS", ts) $
296 -- debug1_ ("RS", (S.evalState (Plain.rackUpLeft ts) Nothing)) $
297 Plain.plainDocument ts
298 -- Plain.text def $ S.evalState (Plain.rackUpLeft ts) Nothing
299 {-
300 TreeSeq.mapAlsoNode
301 (cell1 . unCell)
302 (\_k -> fmap $
303 TreeSeq.mapAlsoNode
304 (cell1 . unCell)
305 (\_k' -> cell1 . unCell)) <$> ts
306 -}
307 --
308 HeaderBrackets ident ->
309 let (attrs,body) = partitionAttrs ts in
310 Seq.singleton $
311 element "reference" $
312 xmlAttrs (setAttr (Cell ep ep ("id",ident)) attrs) <>
313 xmlify inh'{inh_para = xmlTitle : xmlTitle : List.repeat xmlPara} body
314 where
315 inh' = inh{inh_figure = False}
316 --
317 HeaderDotSlash p ->
318 Seq.singleton $
319 element "include" $
320 xmlAttrs [cell ("href",TL.pack $ FP.replaceExtension p "dtc")] <>
321 xmlify inh ts
322 ----------------------
323 NodePair pair ->
324 case pair of
325 PairBracket | to <- Plain.plainDocument ts
326 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
327 Seq.singleton $
328 element "rref" $
329 xmlAttrs [cell ("to",to)]
330 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
331 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
332 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
333 PairFrenchquote ->
334 Seq.singleton $
335 element "q" $
336 xmlify inh ts
337 {-
338 case ts of
339 (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) ->
340 case Seq.viewr ls of
341 m :> Tree0 (Cell br er (TokenPlain r)) ->
342 xmlify inh $
343 Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l)))
344 <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r))))
345 _ ->
346 xmlify inh $
347 Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls
348 (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) ->
349 xmlify inh $
350 rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r)))
351 _ -> xmlify inh ts
352 -}
353 PairHash ->
354 Seq.singleton $
355 element "ref" $
356 xmlAttrs [cell ("to",Plain.plainDocument ts)]
357 PairElem name attrs ->
358 Seq.singleton $
359 element (xmlLocalName name) $
360 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
361 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
362 xmlify inh ts
363 _ ->
364 let (open, close) = pairBorders pair ts in
365 Seq.singleton (Tree0 $ Cell bp bp $ XmlText open) `unionXml`
366 xmlify inh ts `unionXml`
367 Seq.singleton (Tree0 $ Cell ep ep $ XmlText close)
368 ----------------------
369 NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
370 ----------------------
371 NodeToken tok ->
372 case tok of
373 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c
374 TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
375 TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)]
376 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
377 ----------------------
378 NodeLower n as ->
379 Seq.singleton $
380 element "artwork" $
381 xmlify inh ts
382 where
383 cell :: a -> Cell a
384 cell = Cell bp ep
385 element :: XmlName -> XMLs -> XML
386 element n = Tree (cell $ XmlElem n)
387 instance Xmlify (Seq (Cell (XmlName,TL.Text))) where
388 xmlify _inh = xmlAttrs
389
390 -- * Elements
391
392 -- | Reserved elements' name
393 elems :: Set TL.Text
394 elems =
395 [ "about"
396 , "abstract"
397 , "address"
398 , "alias"
399 , "annotation"
400 , "area"
401 , "artwork"
402 , "aside"
403 , "audio"
404 , "author"
405 , "authors"
406 , "bcp14"
407 , "br"
408 , "call"
409 , "city"
410 , "code"
411 , "comment"
412 , "comments"
413 , "country"
414 , "date"
415 , "dd"
416 , "define"
417 , "del"
418 , "div"
419 , "dl"
420 , "document"
421 , "dt"
422 , "editor"
423 , "email"
424 , "embed"
425 , "eref"
426 , "fax"
427 , "feed"
428 , "feedback"
429 , "figure"
430 , "filter"
431 , "format"
432 , "from"
433 , "h"
434 , "hi"
435 , "html5"
436 , "i"
437 , "index"
438 , "iref"
439 , "keyword"
440 , "li"
441 , "link"
442 , "name"
443 , "note"
444 , "ol"
445 , "organization"
446 , "para"
447 , "postamble"
448 , "preamble"
449 , "q"
450 , "ref"
451 , "reference"
452 , "references"
453 , "region"
454 , "rref"
455 , "sc"
456 , "section"
457 , "serie"
458 , "source"
459 , "span"
460 , "street"
461 , "style"
462 , "sub"
463 , "sup"
464 , "table"
465 , "tbody"
466 , "td"
467 , "tel"
468 , "tfoot"
469 , "title"
470 , "th"
471 , "thead"
472 , "toc"
473 , "tof"
474 , "tr"
475 , "tt"
476 , "u"
477 , "ul"
478 , "uri"
479 , "version"
480 , "video"
481 , "workgroup"
482 , "xml"
483 , "zipcode"
484 ]
485
486 -- * Attributes
487
488 -- | Convenient alias, forcing the types
489 xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs
490 xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>)
491
492 -- | Extract attributes
493 partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots)
494 partitionAttrs ts = (attrs,cs)
495 where
496 (as,cs) = (`Seq.partition` ts) $ \case
497 Tree (unCell -> NodeHeader (HeaderEqual (TL.null -> False) _wh)) _cs -> True
498 _ -> False
499 attrs = attr <$> as
500 attr = \case
501 Tree (Cell bp ep (NodeHeader (HeaderEqual n _wh))) a ->
502 Cell bp ep (xmlLocalName n, v)
503 where
504 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
505 _ -> undefined
506
507 getAttrId :: Roots -> TL.Text
508 getAttrId ts =
509 case Seq.viewl ts of
510 EmptyL -> ""
511 t :< _ -> Plain.plainDocument $ Seq.singleton t
512
513 setAttr ::
514 Cell (XmlName, TL.Text) ->
515 Seq (Cell (XmlName, TL.Text)) ->
516 Seq (Cell (XmlName, TL.Text))
517 setAttr a@(unCell -> (k, _v)) as =
518 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
519 Just idx -> Seq.update idx a as
520 Nothing -> a <| as
521
522 defaultAttr ::
523 Seq (Cell (XmlName, TL.Text)) ->
524 Cell (XmlName, TL.Text) ->
525 Seq (Cell (XmlName, TL.Text))
526 defaultAttr as a@(unCell -> (k, _v)) =
527 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
528 Just _idx -> as
529 Nothing -> a <| as
530
531 -- * Text
532
533 -- | Unify two 'XMLs', merging border 'XmlText's if any.
534 unionXml :: XMLs -> XMLs -> XMLs
535 unionXml x y =
536 case (Seq.viewr x, Seq.viewl y) of
537 (xs :> x0, y0 :< ys) ->
538 case (x0,y0) of
539 ( Tree0 (Cell bx ex (XmlText tx))
540 , Tree0 (Cell by ey (XmlText ty)) ) ->
541 xs `unionXml`
542 Seq.singleton (Tree0 $ (XmlText <$>) $ Cell bx ex tx <> Cell by ey ty) `unionXml`
543 ys
544 _ -> x <> y
545 (Seq.EmptyR, _) -> y
546 (_, Seq.EmptyL) -> x
547
548 unionsXml :: Foldable f => f XMLs -> XMLs
549 unionsXml = foldl' unionXml mempty