]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Maintain Plain and HTML5 rendering of TCT.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Arrow (first)
10 import Control.Monad (Monad(..), (=<<))
11 import Data.Bool
12 import Data.Default.Class (Default(..))
13 import Data.Eq (Eq(..))
14 import Data.Foldable (null, foldl', any)
15 import Data.Function (($), (.), id)
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Maybe (Maybe(..), maybe)
18 import Data.Monoid (Monoid(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
21 import Data.Set (Set)
22 import Data.Text (Text)
23 import Data.TreeSeq.Strict (Tree(..))
24 import GHC.Exts (toList)
25 import Prelude (undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text as Text
30 import qualified Data.Text.Lazy as TL
31 import qualified Control.Monad.Trans.State as S
32 import qualified Language.TCT.Write.Plain as Plain
33 import qualified System.FilePath as FP
34
35 import Text.Blaze.XML ()
36 import Language.TCT hiding (Parser)
37 import Language.XML
38 import qualified Data.TreeSeq.Strict as TreeSeq
39
40 import Debug.Trace (trace)
41 import Text.Show (show)
42
43 xmlDocument :: Roots -> XMLs
44 xmlDocument trees =
45 -- (`S.evalState` def) $
46 case Seq.viewl trees of
47 Tree (unCell -> NodeHeader HeaderSection{}) vs :< ts ->
48 case spanlTokens vs of
49 (titles@(Seq.viewl -> (Seq.viewl -> (posTree -> bp) :< _) :< _), vs') ->
50 let vs'' =
51 case Seq.findIndexL
52 (\case
53 Tree (unCell -> NodeHeader (HeaderColon "about" _)) _ -> True
54 _ -> False) vs' of
55 Nothing -> Tree (Cell bp bp $ NodeHeader $ HeaderColon "about" "") mempty <| vs'
56 Just{} -> vs' in
57 xmlify def
58 { inh_titles = titles
59 , inh_figure = True
60 , inh_tree0 = List.repeat xmlPara
61 } vs'' <>
62 xmlify def ts
63 _ -> xmlify def trees
64 _ -> xmlify def trees
65
66 {-
67 -- * Type 'Xmls'
68 type Xmls = S.State State XMLs
69 type Xml = S.State State XML
70 instance Semigroup Xmls where
71 (<>) = liftA2 (<>)
72 instance Monoid Xmls where
73 mempty = return mempty
74 mappend = (<>)
75
76 -- * Type 'State'
77 data State
78 = State
79 { state_pos :: Pos
80 }
81 instance Default State where
82 def = State
83 { state_pos = pos1
84 }
85 -}
86
87 -- * Type 'Inh'
88 data Inh
89 = Inh
90 { inh_figure :: Bool
91 , inh_tree0 :: [Pos -> XMLs -> XML]
92 , inh_titles :: Seq Tokens
93 }
94 instance Default Inh where
95 def = Inh
96 { inh_figure = False
97 , inh_tree0 = []
98 , inh_titles = mempty
99 }
100
101 -- * Class 'Xmlify'
102 class Xmlify a where
103 xmlify :: Inh -> a -> XMLs
104 instance Xmlify Roots where
105 xmlify inh_orig = go inh_orig
106 where
107 go :: Inh -> Roots -> XMLs
108 go inh trees =
109 case Seq.viewl trees of
110 Tree (Cell bp ep (NodeHeader (HeaderBar n _))) _ :< _
111 | (body,ts) <- spanlBar n trees
112 , not (null body) ->
113 (<| go inh ts) $
114 Tree (Cell bp ep "artwork") $
115 maybe id (\v -> (Tree0 (XmlAttr (Cell bp ep ("type", v))) <|)) (mimetype n) $
116 body >>= xmlify inh{inh_tree0=[]}
117
118 Tree nod@(unCell -> NodeHeader (HeaderColon n _)) cs :< ts
119 | (cs',ts') <- spanlHeaderColon n ts
120 , not (null cs') ->
121 go inh $ Tree nod (cs<>cs') <| ts'
122
123 Tree (Cell bp ep (NodeHeader HeaderBrackets{})) _ :< _
124 | (rl,ts) <- spanlBrackets trees
125 , not (null rl) ->
126 (<| go inh ts) $
127 Tree (Cell bp ep "references") $
128 rl >>= xmlify inh_orig
129
130 _ | (ul,ts) <- spanlItems (==HeaderDash) trees
131 , Tree (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
132 (<| go inh ts) $
133 Tree (Cell bp ep "ul") $
134 ul >>= xmlify inh{inh_tree0=List.repeat xmlPara}
135
136 _ | (ol,ts) <- spanlItems (\case HeaderDot{} -> True; _ -> False) trees
137 , Tree (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
138 (<| go inh ts) $
139 Tree (Cell bp ep "ol") $
140 ol >>= xmlify inh{inh_tree0=List.repeat xmlPara}
141
142 t@(Tree0 toks) :< ts ->
143 case inh_tree0 inh of
144 [] -> xmlify inh_orig t <> go inh ts
145 _ | isTokenElem toks -> xmlify inh_orig t <> go inh ts
146 tree0:inh_tree0 ->
147 (case Seq.viewl toks of
148 EmptyL -> id
149 (posTree -> bp) :< _ -> (tree0 bp (xmlify inh_orig t) <|)) $
150 go inh{inh_tree0} ts
151
152 t:<ts ->
153 xmlify inh_orig t <>
154 go inh ts
155
156 _ -> mempty
157 instance Xmlify Root where
158 xmlify inh (Tree (Cell bp ep nod) ts) =
159 case nod of
160 NodeHeader hdr ->
161 case hdr of
162 HeaderSection{} ->
163 let (attrs,body) = partitionAttributesChildren ts in
164 let inh' = inh
165 { inh_tree0 = xmlTitle : List.repeat xmlPara
166 , inh_figure = True
167 } in
168 Seq.singleton $
169 Tree (Cell bp ep "section") $
170 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
171 xmlify inh' body
172 HeaderColon kn _wh ->
173 let (attrs,body) = partitionAttributesChildren ts in
174 let inh' = inh { inh_tree0 =
175 case kn of
176 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
177 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
178 "serie" -> List.repeat xmlName
179 "author" -> List.repeat xmlName
180 "editor" -> List.repeat xmlName
181 "org" -> List.repeat xmlName
182 _ -> []
183 } in
184 case () of
185 _ | kn == "about" -> xmlAbout inh' nod attrs body
186 _ | inh_figure inh && not (kn`List.elem`elems) ->
187 Seq.singleton $
188 Tree (Cell bp ep "figure") $
189 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
190 case toList body of
191 [Tree0{}] -> xmlify inh'{inh_tree0 = List.repeat xmlPara} body
192 _ -> xmlify inh'{inh_tree0 = xmlTitle : List.repeat xmlPara} body
193 _ -> Seq.singleton $ x_Header inh' n
194 HeaderGreat n _wh -> x_Header inh' n
195 HeaderEqual n _wh -> x_Header inh' n
196 HeaderBar n _wh -> x_Header inh' n
197 HeaderDot _n -> Tree (cell "li") $ xmlify inh ts
198 HeaderDash -> Tree (cell "li") $ xmlify inh ts
199 HeaderDashDash -> Tree0 $ XmlComment $ cell $
200 -- debug1_ ("TS", ts) $
201 -- debug1_ ("RS", (S.evalState (Plain.rackUpLeft ts) Nothing)) $
202 Plain.text def $ S.evalState (Plain.rackUpLeft ts) Nothing
203 {-
204 TreeSeq.mapAlsoNode
205 (cell1 . unCell)
206 (\_k -> fmap $
207 TreeSeq.mapAlsoNode
208 (cell1 . unCell)
209 (\_k' -> cell1 . unCell)) <$> ts
210 -}
211 HeaderLower n as -> Tree (cell "artwork") $ xmlify inh ts
212 HeaderBrackets ident ->
213 let inh' = inh{inh_figure = False} in
214 let (attrs',body) = partitionAttributesChildren ts in
215 Tree (cell "reference") $
216 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) (attrs<>attrs')) <>
217 xmlify inh'{inh_tree0 = xmlTitle : xmlTitle : List.repeat xmlPara} body
218 HeaderDotSlash p ->
219 Tree (cell "include") $
220 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
221 xmlify inh ts
222 NodePair pair ->
223 case pair of
224 PairBracket | to <- Plain.text def ts
225 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
226 Seq.singleton .
227 Tree (cell "rref") $
228 xmlAttrs [cell ("to",TL.toStrict to)]
229 PairStar -> Seq.singleton . Tree (cell "b") $ xmlify inh ts
230 PairSlash -> Seq.singleton . Tree (cell "i") $ xmlify inh ts
231 PairBackquote -> Seq.singleton . Tree (cell "code") $ xmlify inh ts
232 PairFrenchquote ->
233 Seq.singleton .
234 Tree (cell "q") $
235 xmlify inh ts
236 {-
237 case ts of
238 (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) ->
239 case Seq.viewr ls of
240 m :> Tree0 (Cell br er (TokenPlain r)) ->
241 xmlify inh $
242 Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l)))
243 <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r))))
244 _ ->
245 xmlify inh $
246 Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls
247 (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) ->
248 xmlify inh $
249 rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r)))
250 _ -> xmlify inh ts
251 -}
252 PairHash ->
253 Seq.singleton .
254 Tree (cell "ref") $
255 xmlAttrs [cell ("to",TL.toStrict $ Plain.text def ts)]
256 PairElem name attrs ->
257 Seq.singleton .
258 Tree (cell $ xmlLocalName name) $
259 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
260 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
261 xmlify inh ts
262 _ ->
263 let (o,c) = pairBorders p ts in
264 Seq.singleton (Tree0 $ XmlPhrases $ phrasify $ Cell bp bp o) `unionXml`
265 xmlify inh ts `unionXml`
266 Seq.singleton (Tree0 $ XmlPhrases $ phrasify $ Cell ep ep c)
267 NodeToken tok ->
268 case tok of
269 TokenEscape c -> Seq.singleton $ Tree0 $ XmlPhrases $ phrasify c
270 TokenText t -> Seq.singleton $ Tree0 $ XmlText t
271 TokenTag t -> Seq.singleton $ Tree (cell "ref") $ xmlAttrs [cell ("to",t)]
272 TokenLink lnk -> Seq.singleton $ Tree (cell "eref") $ xmlAttrs [cell ("to",lnk)]
273 where
274 cell :: a -> Cell a
275 cell = Cell bp ep
276 x_Header :: Inh -> Text -> XML
277 x_Header inh' n =
278 Tree (cell $ xmlLocalName n) $
279 xmlAttrs attrs <>
280 xmlify inh' ts
281
282
283
284 instance Xmlify Tokens where
285 xmlify inh toks =
286 case Seq.viewl toks of
287 Tree (Cell bp _ep (NodePair PairParen)) paren
288 :< (Seq.viewl -> Tree (Cell bb eb (NodePair PairBracket)) bracket
289 :< ts) ->
290 (<| xmlify inh ts) $
291 case bracket of
292 (toList -> [Tree0 (Cell bl el (TokenLink lnk))]) ->
293 Tree (Cell bp eb "eref") $
294 xmlAttrs [Cell bl el ("to",lnk)] <>
295 xmlify inh paren
296 _ ->
297 Tree (Cell bp eb "rref") $
298 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Plain.text def bracket)] <>
299 xmlify inh paren
300 t :< ts -> xmlify inh t `unionXml` xmlify inh ts
301 Seq.EmptyL -> mempty
302 {-
303 instance Xmlify Token where
304 xmlify inh (Tree (Cell bp ep (NodePair p)) ts) =
305 xmlify inh (Tree0 tok) = do
306 where
307 cell :: a -> Cell a
308 cell = Cell bp ep
309 {-
310 whites :: Pos -> Pos -> Seq XmlText
311 whites (Pos bLine bCol) (Pos eLine eCol) =
312 case bLine`compate`eLine of
313 LT -> verts <>
314 EQ -> horiz bCol eCol
315 GT ->
316 -}
317 instance Xmlify (Cell Phrase) where
318 xmlify _inh t = Seq.singleton $ Tree0 $ XmlPhrases $ Seq.singleton t
319 -}
320
321 mimetype :: Text -> Maybe Text
322 mimetype "hs" = Just "text/x-haskell"
323 mimetype "sh" = Just "text/x-shellscript"
324 mimetype "shell" = Just "text/x-shellscript"
325 mimetype "shellscript" = Just "text/x-shellscript"
326 mimetype _ = Nothing
327
328 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
329 xmlPhantom n bp = Tree (Cell bp bp n)
330 xmlPara :: Pos -> XMLs -> XML
331 xmlPara = xmlPhantom "para"
332 xmlTitle :: Pos -> XMLs -> XML
333 xmlTitle = xmlPhantom "title"
334 xmlName :: Pos -> XMLs -> XML
335 xmlName bp (toList -> [Tree0 (XmlText (unCell -> t))]) = Tree0 (XmlAttr $ Cell bp bp ("name", t))
336 xmlName bp ts = xmlPhantom "name" bp ts
337
338 xmlAbout ::
339 Inh ->
340 Cell Header -> Seq (Cell (XmlName, Text)) ->
341 Roots -> XMLs
342 xmlAbout inh hdr attrs body =
343 Seq.singleton $
344 xmlHeader inh hdr attrs $
345 case Seq.viewl (inh_titles inh) of
346 (Seq.viewl -> (posTree -> bt) :< _) :< _ ->
347 ((<$> inh_titles inh) $ \title ->
348 Tree (Cell bt bt $ NodeHeader $ HeaderColon "title" "") $
349 Seq.singleton $ Tree0 title)
350 <> body
351 _ -> body
352
353 xmlHeader :: Inh -> Cell Header -> Seq (Cell (XmlName, Text)) -> Roots -> XML
354 xmlHeader inh (Cell bp ep hdr) attrs ts =
355
356 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
357 xmlAttrs = (Tree0 . XmlAttr <$>)
358
359 -- | Unify two 'XMLs', merging border 'XmlText's if any.
360 unionXml :: XMLs -> XMLs -> XMLs
361 unionXml x y =
362 case (Seq.viewr x, Seq.viewl y) of
363 (xs :> x0, y0 :< ys) ->
364 case (x0,y0) of
365 ( Tree0 (XmlPhrases tx)
366 , Tree0 (XmlPhrases ty) ) ->
367 xs `unionXml`
368 Seq.singleton (Tree0 $ XmlPhrases $ tx <> ty) `unionXml`
369 ys
370 ( Tree0 (XmlText tx)
371 , Tree0 (XmlText ty) ) ->
372 xs `unionXml`
373 Seq.singleton (Tree0 $ XmlText $ tx <> ty) `unionXml`
374 ys
375 _ -> x <> y
376 (Seq.EmptyR, _) -> y
377 (_, Seq.EmptyL) -> x
378
379 spanlBar :: Name -> Roots -> (Roots, Roots)
380 spanlBar name = first unHeaderBar . spanBar
381 where
382 unHeaderBar :: Roots -> Roots
383 unHeaderBar = (=<<) $ \case
384 Tree (unCell -> NodeHeader HeaderBar{}) ts -> ts
385 _ -> mempty
386 spanBar =
387 Seq.spanl $ \case
388 Tree (unCell -> NodeHeader (HeaderBar n _)) _ | n == name -> True
389 _ -> False
390
391 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
392 spanlItems liHeader ts =
393 let (lis, ts') = spanLIs ts in
394 foldl' accumLIs (mempty,ts') lis
395 where
396 spanLIs :: Roots -> (Roots, Roots)
397 spanLIs = Seq.spanl $ \case
398 Tree (unCell -> NodeHeader (liHeader -> True)) _ -> True
399 Tree (NodeToken toks) _ ->
400 (`any` toks) $ \case
401 TreeN (unCell -> NodePair (PairElem "li" _)) _ -> True
402 _ -> False
403 {-
404 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
405 [unCell -> TokenPair (PairElem "li" _) _] -> True
406 _ -> False
407 -}
408 _ -> False
409 accumLIs :: (Roots,Roots) -> Root -> (Roots,Roots)
410 accumLIs acc@(oks,kos) t =
411 case t of
412 Tree (unCell -> NodeHeader (liHeader -> True)) _ -> (oks|>t,kos)
413 Tree0 toks ->
414 let (ok,ko) =
415 (`Seq.spanl` toks) $ \case
416 Tree (unCell -> NodePair (PairElem "li" _)) _ -> True
417 -- (isTokenWhite -> True) -> True -- TODO: see if this is still useful
418 _ -> False in
419 ( if null ok then oks else oks|>Tree0 (rmTokenWhite ok)
420 , if null ko then kos else Tree0 ko<|kos )
421 _ -> acc
422 {-
423 rmTokenWhite :: Tokens -> Tokens
424 rmTokenWhite =
425 Seq.filter $ \case
426 (isTokenWhite -> False) -> True
427 _ -> True
428 -}
429
430 spanlHeaderColon :: Name -> Roots -> (Roots, Roots)
431 spanlHeaderColon name =
432 Seq.spanl $ \case
433 Tree (unCell -> NodeHeader (HeaderBar n _)) _ -> n == name
434 Tree (unCell -> NodeHeader (HeaderGreat n _)) _ -> n == name
435 _ -> False
436
437 spanlBrackets :: Roots -> (Roots, Roots)
438 spanlBrackets =
439 Seq.spanl $ \case
440 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
441 _ -> False
442
443 spanlTokens :: Roots -> (Seq Tokens, Roots)
444 spanlTokens =
445 first ((\case
446 Tree0 ts -> ts
447 _ -> undefined) <$>) .
448 Seq.spanl (\case
449 Tree0{} -> True
450 _ -> False)
451
452 getAttrId :: Roots -> Text
453 getAttrId ts =
454 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
455 Just (Tree0 toks) -> TL.toStrict $ Plain.text def toks
456 _ -> ""
457
458 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
459 setXmlAttr a@(unCell -> (k, _v)) as =
460 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
461 Just idx -> Seq.update idx a as
462 Nothing -> a <| as
463
464 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
465 defXmlAttr a@(unCell -> (k, _v)) as =
466 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
467 Just _idx -> as
468 Nothing -> a <| as
469
470 partitionAttributesChildren :: Roots -> (Seq (Cell (XmlName, Text)), Roots)
471 partitionAttributesChildren ts = (attrs,cs)
472 where
473 (as,cs) = (`Seq.partition` ts) $ \case
474 Tree (unCell -> NodeHeader HeaderEqual{}) _cs -> True
475 _ -> False
476 attrs = attr <$> as
477 attr = \case
478 Tree (Cell bp ep (NodeHeader (HeaderEqual n _wh))) a ->
479 Cell bp ep (xmlLocalName n, v)
480 where
481 v = TL.toStrict $
482 Plain.text def{Plain.state_escape = False} $
483 TreeSeq.mapAlsoNode
484 (cell1 . unCell)
485 (\_k -> fmap $
486 TreeSeq.mapAlsoNode
487 (cell1 . unCell)
488 (\_k' -> cell1 . unCell)) <$> a
489 _ -> undefined
490
491 elems :: Set Text
492 elems =
493 [ "about"
494 , "abstract"
495 , "address"
496 , "alias"
497 , "annotation"
498 , "area"
499 , "artwork"
500 , "aside"
501 , "audio"
502 , "author"
503 , "authors"
504 , "bcp14"
505 , "br"
506 , "call"
507 , "city"
508 , "code"
509 , "comment"
510 , "comments"
511 , "country"
512 , "date"
513 , "dd"
514 , "define"
515 , "del"
516 , "div"
517 , "dl"
518 , "document"
519 , "dt"
520 , "editor"
521 , "email"
522 , "embed"
523 , "eref"
524 , "fax"
525 , "feed"
526 , "feedback"
527 , "figure"
528 , "filter"
529 , "format"
530 , "from"
531 , "h"
532 , "hi"
533 , "html5"
534 , "i"
535 , "index"
536 , "iref"
537 , "keyword"
538 , "li"
539 , "link"
540 , "name"
541 , "note"
542 , "ol"
543 , "organization"
544 , "para"
545 , "postamble"
546 , "preamble"
547 , "q"
548 , "ref"
549 , "reference"
550 , "references"
551 , "region"
552 , "rref"
553 , "sc"
554 , "section"
555 , "serie"
556 , "source"
557 , "span"
558 , "street"
559 , "style"
560 , "sub"
561 , "sup"
562 , "table"
563 , "tbody"
564 , "td"
565 , "tel"
566 , "tfoot"
567 , "title"
568 , "th"
569 , "thead"
570 , "toc"
571 , "tof"
572 , "tr"
573 , "tt"
574 , "u"
575 , "ul"
576 , "uri"
577 , "version"
578 , "video"
579 , "workgroup"
580 , "xml"
581 , "zipcode"
582 ]