]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Fix parsing HeaderSection.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Ord (Ord(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
21 import Data.Set (Set)
22 import Data.TreeSeq.Strict (Tree(..))
23 import Data.Tuple (uncurry)
24 import Prelude (Num(..), undefined)
25 import qualified Data.Char as Char
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text.Lazy as TL
29 import qualified Language.TCT.Write.Plain as Plain
30 import qualified System.FilePath as FP
31
32 import Text.Blaze.XML ()
33 import Language.TCT hiding (Parser)
34 import Language.TCT.Debug
35 import Language.XML
36
37 -- | Main entry point
38 --
39 -- NOTE: 'XmlNode' are still annotated with 'Cell',
40 -- but nothing is done to preserve any ordering amongst them,
41 -- because 'Node's sometimes need to be reordered
42 -- (eg. about/title may have a title from the section before,
43 -- hence outside of about).
44 -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting.
45 xmlDocument :: Roots -> XMLs
46 xmlDocument doc =
47 -- (`S.evalState` def) $
48 case Seq.viewl doc of
49 sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot ->
50 let (titles, content) = partitionSection sec in
51 case Seq.viewl titles of
52 (unTree -> Cell bt et _) :< _ ->
53 xmlify def
54 { inh_titles = titles
55 , inh_figure = True
56 } contentWithAbout <>
57 xmlify def foot
58 where
59 contentWithAbout =
60 case Seq.findIndexL isAbout content of
61 Nothing -> Tree (Cell bt et $ NodeHeader $ HeaderColon "about" "") mempty <| content
62 Just{} -> content
63 isAbout = \case
64 (unTree -> unCell -> NodeHeader (HeaderColon "about" _wh)) -> True
65 _ -> False
66 _ -> xmlify def doc
67 _ -> xmlify def doc
68
69 partitionSection :: Root -> (Roots, Roots)
70 partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) =
71 case Seq.viewl body of
72 EmptyL -> mempty
73 title@(unTree -> Cell _bt et NodePara) :< rest ->
74 let (subtitles, content) = spanlSubtitles et rest in
75 (title <| (subtitles >>= subTrees), content)
76 where
77 spanlSubtitles ep ts =
78 case Seq.viewl ts of
79 sub@(unTree -> Cell bs es (NodeHeader (HeaderSection lvlSub))) :< rs
80 | lvlSub <= lvlPar
81 , pos_line bs - pos_line ep <= 1 ->
82 let (subs, ts') = spanlSubtitles es rs in
83 (sub <| subs, ts')
84 _ -> (mempty, ts)
85
86 {-
87 -- * Type 'Xmls'
88 type Xmls = S.State State XMLs
89 type Xml = S.State State XML
90 instance Semigroup Xmls where
91 (<>) = liftA2 (<>)
92 instance Monoid Xmls where
93 mempty = return mempty
94 mappend = (<>)
95
96 -- * Type 'State'
97 data State
98 = State
99 { state_pos :: Pos
100 }
101 instance Default State where
102 def = State
103 { state_pos = pos1
104 }
105 -}
106
107 -- * Type 'Inh'
108 data Inh
109 = Inh
110 { inh_figure :: Bool
111 , inh_para :: [Cell () -> XMLs -> XML]
112 , inh_titles :: Roots
113 }
114 instance Default Inh where
115 def = Inh
116 { inh_figure = False
117 , inh_para = List.repeat xmlPara
118 , inh_titles = mempty
119 }
120
121 -- ** 'inh_para'
122 xmlPara :: Cell a -> XMLs -> XML
123 xmlPara c = Tree (XmlElem "para" <$ c)
124
125 xmlTitle :: Cell a -> XMLs -> XML
126 xmlTitle c = Tree (XmlElem "title" <$ c)
127
128 xmlName :: Cell a -> XMLs -> XML
129 xmlName c = Tree (XmlElem "name" <$ c)
130
131 -- * Class 'Xmlify'
132 class Xmlify a where
133 xmlify :: Inh -> a -> XMLs
134 instance Xmlify Roots where
135 xmlify inh roots =
136 case Seq.viewl roots of
137 EmptyL -> mempty
138 r@(Tree cr@(Cell _br _er nr) ts) :< rs ->
139 case nr of
140 ----------------------
141 -- NOTE: HeaderColon becomes parent
142 -- of any continuous following-sibling HeaderBar or HeaderGreat
143 NodeHeader (HeaderColon n _wh)
144 | (span, rest) <- spanlHeaderColon rs
145 , not $ null span ->
146 xmlify inh $ Tree cr (ts<>span) <| rest
147 where
148 spanlHeaderColon :: Roots -> (Roots, Roots)
149 spanlHeaderColon =
150 Seq.spanl $ \case
151 Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n
152 Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n
153 _ -> False
154 ----------------------
155 -- NOTE: gather HeaderBrackets
156 NodeHeader HeaderBrackets{}
157 | (span,rest) <- spanlBrackets roots
158 , not (null span) ->
159 (<| xmlify inh rest) $
160 element "references" $
161 span >>= xmlify inh
162 where
163 spanlBrackets :: Roots -> (Roots, Roots)
164 spanlBrackets =
165 Seq.spanl $ \case
166 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
167 _ -> False
168 ----------------------
169 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
170 NodeText x
171 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
172 xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs'
173 ----------------------
174 -- NOTE: detect [some text](http://some.url) or [SomeRef]
175 NodePair PairParen
176 | Tree (Cell bb eb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
177 (<| xmlify inh rs') $
178 case bracket of
179 (toList -> [unTree -> Cell bl el (NodeToken (TokenLink lnk))]) ->
180 element "eref" $
181 xmlAttrs [Cell bl el ("to",lnk)] <>
182 xmlify inh ts
183 _ ->
184 element "rref" $
185 xmlAttrs [Cell bb eb ("to",Plain.plainDocument bracket)] <>
186 xmlify inh ts
187 ----------------------
188 -- NOTE: gather HeaderDash
189 _ | (span, rest) <- spanlItems (==HeaderDash) roots
190 , not $ null span ->
191 (<| xmlify inh rest) $
192 element "ul" $
193 span >>= xmlify inh{inh_para=List.repeat xmlPara}
194 ----------------------
195 -- NOTE: gather HeaderDot
196 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
197 , not $ null span ->
198 (<| xmlify inh rest) $
199 element "ol" $
200 span >>= xmlify inh{inh_para=List.repeat xmlPara}
201 where
202 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
203 spanlItems liHeader =
204 Seq.spanl $ \(unTree -> unCell -> nod) ->
205 case nod of
206 NodeHeader (HeaderColon "li" _wh) -> True
207 NodeHeader hdr -> liHeader hdr
208 NodePair (PairElem "li" _as) -> True
209 _ -> False
210 ----------------------
211 -- NOTE: context-free Root
212 _ ->
213 xmlify inh r <>
214 xmlify inh rs
215 where
216 element :: XmlName -> XMLs -> XML
217 element n = Tree (XmlElem n <$ cr)
218 {-
219 t@(Tree (NodePair (PairElem))) :< ts ->
220 case inh_para inh of
221 [] -> xmlify inh t <> go inh ts
222 _ | isTokenElem toks -> xmlify inh t <> go inh ts
223 tree0:inh_para ->
224 (case Seq.viewl toks of
225 EmptyL -> id
226 (unTree -> cell_begin -> bp) :< _ -> (tree0 bp (xmlify inh t) <|)) $
227 go inh{inh_para} ts
228 -}
229 instance Xmlify Root where
230 xmlify inh tr@(Tree cel@(Cell bp ep nod) ts) =
231 case nod of
232 NodeGroup -> xmlify inh ts
233 ----------------------
234 NodePara ->
235 case inh_para inh of
236 [] -> xmlify inh ts
237 para:inh_para ->
238 Seq.singleton $
239 para (() <$ cel) $
240 xmlify inh{inh_para} ts
241 ----------------------
242 NodeHeader hdr ->
243 case hdr of
244 --
245 HeaderSection{} ->
246 Seq.singleton $
247 element "section" $ head <> xmlify inh' body
248 where
249 (titles, content) = partitionSection tr
250 (attrs, body) = partitionAttrs content
251 head =
252 case Seq.viewl titles of
253 EmptyL -> mempty
254 title@(unTree -> ct) :< subtitles ->
255 xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>
256 aliases
257 where
258 aliases =
259 subtitles >>= \subtitle@(unTree -> cs) ->
260 return $
261 Tree (cs $> XmlElem "alias") $
262 xmlAttrs (return $ cs $> ("id",getAttrId subtitle))
263 inh' = inh
264 { inh_para = xmlTitle : List.repeat xmlPara
265 , inh_figure = True
266 }
267 --
268 HeaderColon n _wh ->
269 let (attrs,body) = partitionAttrs ts in
270 case n of
271 -- NOTE: insert titles into <about>.
272 "about" ->
273 Seq.singleton $
274 element "about" $
275 (inh_titles inh >>= xmlify inh') <>
276 xmlAttrs attrs <>
277 xmlify inh body
278 -- NOTE: in <figure> mode, unreserved nodes become <figure>
279 _ | inh_figure inh && not (n`List.elem`elems) ->
280 Seq.singleton $
281 element "figure" $
282 -- xmlAttrs (setAttr (Cell ep ep ("type",n)) attrs) <>
283 xmlAttrs (attrs `defaultAttr` Cell bp bp ("type", n)) <>
284 case toList body of
285 [Tree0{}] -> xmlify inh'{inh_para = List.repeat xmlPara} body
286 _ -> xmlify inh'{inh_para = xmlTitle : List.repeat xmlPara} body
287 -- NOTE: reserved nodes
288 _ ->
289 Seq.singleton $
290 element (xmlLocalName n) $
291 xmlAttrs attrs <>
292 xmlify inh' ts
293 where
294 inh' = inh
295 { inh_para =
296 case n of
297 "about" -> xmlTitle : List.repeat xmlPara
298 "reference" -> xmlTitle : List.repeat xmlPara
299 "serie" -> List.repeat xmlName
300 "author" -> List.repeat xmlName
301 "editor" -> List.repeat xmlName
302 "org" -> List.repeat xmlName
303 _ -> []
304 }
305 ----------------------
306 HeaderBar n _wh ->
307 Seq.singleton $
308 element "artwork" $
309 xmlAttrs (Seq.singleton $ Cell bp bp ("type", if TL.null n then "txt" else n)) <>
310 xmlify inh{inh_para=[]} ts
311 ----------------------
312 HeaderGreat n _wh ->
313 Seq.singleton $
314 let (attrs,body) = partitionAttrs ts in
315 element "artwork" $
316 xmlAttrs (attrs `defaultAttr` Cell bp bp ("type", if TL.null n then "quote" else n)) <>
317 xmlify inh{inh_para=[]} body
318 --
319 HeaderEqual n _wh -> Seq.singleton $ element (xmlLocalName n) $ xmlify inh ts
320 --
321 HeaderDot _n -> Seq.singleton $ element "li" $ xmlify inh ts
322 --
323 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
324 --
325 HeaderDashDash -> Seq.singleton $ Tree0 $ cell $ XmlComment $
326 -- debug1_ ("TS", ts) $
327 -- debug1_ ("RS", (S.evalState (Plain.rackUpLeft ts) Nothing)) $
328 Plain.plainDocument ts
329 -- Plain.text def $ S.evalState (Plain.rackUpLeft ts) Nothing
330 {-
331 TreeSeq.mapAlsoNode
332 (cell1 . unCell)
333 (\_k -> fmap $
334 TreeSeq.mapAlsoNode
335 (cell1 . unCell)
336 (\_k' -> cell1 . unCell)) <$> ts
337 -}
338 --
339 HeaderBrackets ident ->
340 let (attrs,body) = partitionAttrs ts in
341 Seq.singleton $
342 element "reference" $
343 xmlAttrs (setAttr (Cell ep ep ("id",ident)) attrs) <>
344 xmlify inh'{inh_para = xmlTitle : xmlTitle : List.repeat xmlPara} body
345 where
346 inh' = inh{inh_figure = False}
347 --
348 HeaderDotSlash p ->
349 Seq.singleton $
350 element "include" $
351 xmlAttrs [cell ("href",TL.pack $ FP.replaceExtension p "dtc")] <>
352 xmlify inh ts
353 ----------------------
354 NodePair pair ->
355 case pair of
356 PairBracket | to <- Plain.plainDocument ts
357 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
358 Seq.singleton $
359 element "rref" $
360 xmlAttrs [cell ("to",to)]
361 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
362 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
363 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
364 PairFrenchquote ->
365 Seq.singleton $
366 element "q" $
367 xmlify inh ts
368 {-
369 case ts of
370 (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) ->
371 case Seq.viewr ls of
372 m :> Tree0 (Cell br er (TokenPlain r)) ->
373 xmlify inh $
374 Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l)))
375 <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r))))
376 _ ->
377 xmlify inh $
378 Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls
379 (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) ->
380 xmlify inh $
381 rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r)))
382 _ -> xmlify inh ts
383 -}
384 PairHash ->
385 Seq.singleton $
386 element "ref" $
387 xmlAttrs [cell ("to",Plain.plainDocument ts)]
388 PairElem name attrs ->
389 Seq.singleton $
390 element (xmlLocalName name) $
391 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
392 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
393 xmlify inh ts
394 _ ->
395 let (open, close) = pairBorders pair ts in
396 Seq.singleton (Tree0 $ Cell bp bp $ XmlText open) `unionXml`
397 xmlify inh ts `unionXml`
398 Seq.singleton (Tree0 $ Cell ep ep $ XmlText close)
399 ----------------------
400 NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
401 ----------------------
402 NodeToken tok ->
403 case tok of
404 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c
405 TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
406 TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)]
407 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
408 ----------------------
409 NodeLower n as ->
410 Seq.singleton $
411 element "artwork" $
412 xmlify inh ts
413 where
414 cell :: a -> Cell a
415 cell = Cell bp ep
416 element :: XmlName -> XMLs -> XML
417 element n = Tree (cell $ XmlElem n)
418 instance Xmlify (Seq (Cell (XmlName,TL.Text))) where
419 xmlify _inh = xmlAttrs
420
421 -- * Elements
422
423 -- | Reserved elements' name
424 elems :: Set TL.Text
425 elems =
426 [ "about"
427 , "abstract"
428 , "address"
429 , "alias"
430 , "annotation"
431 , "area"
432 , "artwork"
433 , "aside"
434 , "audio"
435 , "author"
436 , "authors"
437 , "bcp14"
438 , "br"
439 , "call"
440 , "city"
441 , "code"
442 , "comment"
443 , "comments"
444 , "country"
445 , "date"
446 , "dd"
447 , "define"
448 , "del"
449 , "div"
450 , "dl"
451 , "document"
452 , "dt"
453 , "editor"
454 , "email"
455 , "embed"
456 , "eref"
457 , "fax"
458 , "feed"
459 , "feedback"
460 , "figure"
461 , "filter"
462 , "format"
463 , "from"
464 , "h"
465 , "hi"
466 , "html5"
467 , "i"
468 , "index"
469 , "iref"
470 , "keyword"
471 , "li"
472 , "link"
473 , "name"
474 , "note"
475 , "ol"
476 , "organization"
477 , "para"
478 , "postamble"
479 , "preamble"
480 , "q"
481 , "ref"
482 , "reference"
483 , "references"
484 , "region"
485 , "rref"
486 , "sc"
487 , "section"
488 , "serie"
489 , "source"
490 , "span"
491 , "street"
492 , "style"
493 , "sub"
494 , "sup"
495 , "table"
496 , "tbody"
497 , "td"
498 , "tel"
499 , "tfoot"
500 , "title"
501 , "th"
502 , "thead"
503 , "toc"
504 , "tof"
505 , "tr"
506 , "tt"
507 , "u"
508 , "ul"
509 , "uri"
510 , "version"
511 , "video"
512 , "workgroup"
513 , "xml"
514 , "zipcode"
515 ]
516
517 -- * Attributes
518
519 -- | Convenient alias, forcing the types
520 xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs
521 xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>)
522
523 -- | Extract attributes
524 partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots)
525 partitionAttrs ts = (attrs,cs)
526 where
527 (as,cs) = (`Seq.partition` ts) $ \case
528 Tree (unCell -> NodeHeader (HeaderEqual (TL.null -> False) _wh)) _cs -> True
529 _ -> False
530 attrs = attr <$> as
531 attr = \case
532 Tree (Cell bp ep (NodeHeader (HeaderEqual n _wh))) a ->
533 Cell bp ep (xmlLocalName n, v)
534 where
535 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
536 _ -> undefined
537
538 getAttrId :: Root -> TL.Text
539 getAttrId = Plain.plainDocument . Seq.singleton
540
541 setAttr ::
542 Cell (XmlName, TL.Text) ->
543 Seq (Cell (XmlName, TL.Text)) ->
544 Seq (Cell (XmlName, TL.Text))
545 setAttr a@(unCell -> (k, _v)) as =
546 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
547 Just idx -> Seq.update idx a as
548 Nothing -> a <| as
549
550 defaultAttr ::
551 Seq (Cell (XmlName, TL.Text)) ->
552 Cell (XmlName, TL.Text) ->
553 Seq (Cell (XmlName, TL.Text))
554 defaultAttr as a@(unCell -> (k, _v)) =
555 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
556 Just _idx -> as
557 Nothing -> a <| as
558
559 -- * Text
560
561 -- | Unify two 'XMLs', merging border 'XmlText's if any.
562 unionXml :: XMLs -> XMLs -> XMLs
563 unionXml x y =
564 case (Seq.viewr x, Seq.viewl y) of
565 (xs :> x0, y0 :< ys) ->
566 case (x0,y0) of
567 ( Tree0 (Cell bx ex (XmlText tx))
568 , Tree0 (Cell by ey (XmlText ty)) ) ->
569 xs `unionXml`
570 Seq.singleton (Tree0 $ (XmlText <$>) $ Cell bx ex tx <> Cell by ey ty) `unionXml`
571 ys
572 _ -> x <> y
573 (Seq.EmptyR, _) -> y
574 (_, Seq.EmptyL) -> x
575
576 unionsXml :: Foldable f => f XMLs -> XMLs
577 unionsXml = foldl' unionXml mempty