]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Add Html5ify for TCT.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Arrow (first)
10 import Control.Monad (Monad(..), (=<<))
11 import Data.Bool
12 import Data.Default.Class (Default(..))
13 import Data.Eq (Eq(..))
14 import Data.Foldable (null, foldl', any)
15 import Data.Function (($), (.), id)
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Maybe (Maybe(..), maybe)
18 import Data.Monoid (Monoid(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
21 import Data.Set (Set)
22 import Data.Text (Text)
23 import Data.TreeSeq.Strict (Tree(..))
24 import GHC.Exts (toList)
25 import Prelude (undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text as Text
30 import qualified Data.Text.Lazy as TL
31 import qualified Language.TCT.Write.Plain as Plain
32 import qualified System.FilePath as FP
33
34 import Text.Blaze.XML ()
35 import Language.TCT hiding (Parser)
36 import Language.XML
37 import qualified Data.TreeSeq.Strict as TreeSeq
38
39 -- * Type 'Inh'
40 data Inh
41 = Inh
42 { inh_figure :: Bool
43 , inh_tree0 :: [Pos -> XMLs -> XML]
44 , inh_titles :: Seq Tokens
45 }
46 instance Default Inh where
47 def = Inh
48 { inh_figure = False
49 , inh_tree0 = []
50 , inh_titles = mempty
51 }
52
53 mimetype :: Text -> Maybe Text
54 mimetype "hs" = Just "text/x-haskell"
55 mimetype "sh" = Just "text/x-shellscript"
56 mimetype "shell" = Just "text/x-shellscript"
57 mimetype "shellscript" = Just "text/x-shellscript"
58 mimetype _ = Nothing
59
60 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
61 xmlPhantom n bp = TreeN (Cell bp bp n)
62 xmlPara :: Pos -> XMLs -> XML
63 xmlPara = xmlPhantom "para"
64 xmlTitle :: Pos -> XMLs -> XML
65 xmlTitle = xmlPhantom "title"
66 xmlName :: Pos -> XMLs -> XML
67 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
68 xmlName bp ts = xmlPhantom "name" bp ts
69
70 xmlDocument :: TCTs -> XMLs
71 xmlDocument trees =
72 case Seq.viewl trees of
73 TreeN (unCell -> KeySection{}) vs :< ts ->
74 case spanlTokens vs of
75 (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') ->
76 let vs'' =
77 case Seq.findIndexL
78 (\case
79 TreeN (unCell -> KeyColon "about" _) _ -> True
80 _ -> False) vs' of
81 Just{} -> vs'
82 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
83 in
84 xmlTCTs def
85 { inh_titles = titles
86 , inh_figure = True
87 , inh_tree0 = List.repeat xmlPara
88 } vs'' <>
89 xmlTCTs def ts
90 _ -> xmlTCTs def trees
91 _ -> xmlTCTs def trees
92
93 xmlTCTs :: Inh -> TCTs -> XMLs
94 xmlTCTs inh_orig = go inh_orig
95 where
96 go :: Inh -> TCTs -> XMLs
97 go inh trees =
98 case Seq.viewl trees of
99 TreeN (Cell bp ep (KeyBar n _)) _ :< _
100 | (body,ts) <- spanlBar n trees
101 , not (null body) ->
102 (<| go inh ts) $
103 TreeN (Cell bp ep "artwork") $
104 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
105 body >>= xmlTCT inh{inh_tree0=[]}
106
107 TreeN key@(unCell -> KeyColon n _) cs :< ts
108 | (cs',ts') <- spanlKeyColon n ts
109 , not (null cs') ->
110 go inh $ TreeN key (cs<>cs') <| ts'
111
112 TreeN (Cell bp ep KeyBrackets{}) _ :< _
113 | (rl,ts) <- spanlBrackets trees
114 , not (null rl) ->
115 (<| go inh ts) $
116 TreeN (Cell bp ep "references") $
117 rl >>= xmlTCT inh_orig
118
119 _ | (ul,ts) <- spanlItems (==KeyDash) trees
120 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
121 (<| go inh ts) $
122 TreeN (Cell bp ep "ul") $
123 ul >>= xmlTCT inh{inh_tree0=List.repeat xmlPara}
124
125 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
126 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
127 (<| go inh ts) $
128 TreeN (Cell bp ep "ol") $
129 ol >>= xmlTCT inh{inh_tree0=List.repeat xmlPara}
130
131 t@(Tree0 toks) :< ts | isTokenElem toks ->
132 xmlTCT inh_orig t <>
133 go inh ts
134
135 t@(Tree0 toks) :< ts ->
136 case inh_tree0 inh of
137 [] ->
138 xmlTCT inh_orig t <>
139 go inh{inh_tree0=[]} ts
140 x:xs ->
141 case Seq.viewl toks of
142 EmptyL -> go inh{inh_tree0=xs} ts
143 Cell bp _ep _ :< _ ->
144 (<| go inh{inh_tree0=xs} ts) $
145 x bp $
146 xmlTCT inh_orig t
147
148 t:<ts ->
149 xmlTCT inh_orig t <>
150 go inh ts
151
152 _ -> mempty
153
154 xmlTCT :: Inh -> TCT -> XMLs
155 xmlTCT inh tr =
156 case tr of
157 TreeN (Cell bp ep KeySection{}) ts ->
158 let (attrs,body) = partitionAttributesChildren ts in
159 let inh' = inh
160 { inh_tree0 = xmlTitle : List.repeat xmlPara
161 , inh_figure = True
162 } in
163 Seq.singleton $
164 TreeN (Cell bp ep "section") $
165 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
166 xmlTCTs inh' body
167
168 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
169 let (attrs,body) = partitionAttributesChildren ts in
170 let inh' = inh { inh_tree0 =
171 case kn of
172 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
173 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
174 "author" -> List.repeat xmlName
175 "editor" -> List.repeat xmlName
176 _ -> []
177 } in
178 case () of
179 _ | kn == "about" -> xmlAbout inh' key attrs body
180
181 _ | inh_figure inh && not (kn`List.elem`elems) ->
182 Seq.singleton $
183 TreeN (Cell bp ep "figure") $
184 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
185 case toList body of
186 [Tree0{}] -> xmlTCTs inh'{inh_tree0 = List.repeat xmlPara} body
187 _ -> xmlTCTs inh'{inh_tree0 = xmlTitle : List.repeat xmlPara} body
188
189 _ -> Seq.singleton $ xmlKey inh' key attrs body
190
191 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
192
193 Tree0 ts -> xmlTokens ts
194
195 xmlAbout ::
196 Inh ->
197 Cell Key -> Seq (Cell (XmlName, Text)) ->
198 TCTs -> XMLs
199 xmlAbout inh key attrs body =
200 Seq.singleton $
201 xmlKey inh key attrs $
202 case Seq.viewl (inh_titles inh) of
203 (Seq.viewl -> Cell bt _et _ :< _) :< _ ->
204 ((<$> inh_titles inh) $ \title ->
205 TreeN (Cell bt bt $ KeyColon "title" "") $
206 Seq.singleton $ Tree0 title)
207 <> body
208 _ -> body
209
210 xmlKey :: Inh -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
211 xmlKey inh (Cell bp ep key) attrs ts =
212 case key of
213 KeyColon n _wh -> d_key n
214 KeyGreat n _wh -> d_key n
215 KeyEqual n _wh -> d_key n
216 KeyBar n _wh -> d_key n
217 KeyDot _n -> TreeN (cell "li") $ xmlTCTs inh ts
218 KeyDash -> TreeN (cell "li") $ xmlTCTs inh ts
219 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
220 where
221 com :: TL.Text
222 com =
223 Plain.text def $
224 TreeSeq.mapAlsoNode
225 (cell1 . unCell)
226 (\_path -> fmap $ cell1 . unCell) <$> ts
227 KeyLower n as -> TreeN (cell "artwork") $ xmlTCTs inh ts
228 KeyBrackets ident ->
229 let inh' = inh{inh_figure = False} in
230 let (attrs',body) = partitionAttributesChildren ts in
231 TreeN (cell "reference") $
232 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) (attrs<>attrs')) <>
233 xmlTCTs inh' body
234 KeyDotSlash p ->
235 TreeN (cell "include") $
236 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
237 xmlTCTs inh ts
238 where
239 cell :: a -> Cell a
240 cell = Cell bp ep
241 d_key :: Text -> XML
242 d_key n =
243 TreeN (cell $ xmlLocalName n) $
244 xmlAttrs attrs <>
245 xmlTCTs inh ts
246
247 xmlTokens :: Tokens -> XMLs
248 xmlTokens tok = goTokens tok
249 where
250 go :: Cell Token -> XMLs
251 go (Cell bp ep tk) =
252 case tk of
253 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
254 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
255 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
256 TokenLink lnk -> Seq.singleton $
257 TreeN (cell "eref") $
258 xmlAttrs [cell ("to",lnk)]
259 TokenPair PairBracket ts | to <- Plain.textify ts
260 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
261 Seq.singleton $
262 TreeN (cell "rref") $
263 xmlAttrs [cell ("to",TL.toStrict to)]
264 TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ goTokens ts
265 TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ goTokens ts
266 TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ goTokens ts
267 TokenPair PairFrenchquote toks@ts ->
268 Seq.singleton $
269 TreeN (cell "q") $
270 case ts of
271 (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) ->
272 case Seq.viewr ls of
273 m :> Cell br er (TokenPlain r) ->
274 goTokens $
275 Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))
276 <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))
277 _ ->
278 goTokens $
279 Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls
280 (Seq.viewr -> rs :> Cell br er (TokenPlain r)) ->
281 goTokens $
282 rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r))
283 _ -> goTokens toks
284 TokenPair PairHash to ->
285 Seq.singleton $
286 TreeN (cell "ref") $
287 xmlAttrs [cell ("to",TL.toStrict $ Plain.textify to)]
288 TokenPair (PairElem name attrs) ts ->
289 Seq.singleton $
290 TreeN (cell $ xmlLocalName name) $
291 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) -> cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
292 goTokens ts
293 TokenPair p ts ->
294 let (o,c) = pairBorders p ts in
295 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
296 goTokens ts `unionXml`
297 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
298 where
299 cell :: a -> Cell a
300 cell = Cell bp ep
301
302 goTokens :: Tokens -> XMLs
303 goTokens toks =
304 case Seq.viewl toks of
305 Cell bp _ep (TokenPair PairParen paren)
306 :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket)
307 :< ts) ->
308 (<| goTokens ts) $
309 case bracket of
310 (toList -> [Cell bl el (TokenLink lnk)]) ->
311 TreeN (Cell bp eb "eref") $
312 xmlAttrs [Cell bl el ("to",lnk)] <>
313 goTokens paren
314 _ ->
315 TreeN (Cell bp eb "rref") $
316 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Plain.textify bracket)] <>
317 goTokens paren
318 t :< ts -> go t `unionXml` goTokens ts
319 Seq.EmptyL -> mempty
320
321 -- | Unify two 'XMLs', merging border 'XmlText's if any.
322 unionXml :: XMLs -> XMLs -> XMLs
323 unionXml x y =
324 case (Seq.viewr x, Seq.viewl y) of
325 (xs :> x0, y0 :< ys) ->
326 case (x0,y0) of
327 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
328 xs `unionXml`
329 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
330 ys
331 _ -> x <> y
332 (Seq.EmptyR, _) -> y
333 (_, Seq.EmptyL) -> x
334
335
336 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
337 spanlBar name = first unKeyBar . spanBar
338 where
339 unKeyBar :: TCTs -> TCTs
340 unKeyBar = (=<<) $ \case
341 TreeN (unCell -> KeyBar{}) ts -> ts
342 _ -> mempty
343 spanBar =
344 Seq.spanl $ \case
345 TreeN (unCell -> KeyBar n _) _ | n == name -> True
346 _ -> False
347
348 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
349 spanlItems liKey ts =
350 let (lis, ts') = spanLIs ts in
351 foldl' accumLIs (mempty,ts') lis
352 where
353 spanLIs = Seq.spanl $ \case
354 TreeN (unCell -> liKey -> True) _ -> True
355 Tree0 toks ->
356 (`any` toks) $ \case
357 (unCell -> TokenPair (PairElem "li" _) _) -> True
358 _ -> False
359 {-
360 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
361 [unCell -> TokenPair (PairElem "li" _) _] -> True
362 _ -> False
363 -}
364 _ -> False
365 accumLIs acc@(oks,kos) t =
366 case t of
367 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
368 Tree0 toks ->
369 let (ok,ko) =
370 (`Seq.spanl` toks) $ \tok ->
371 case unCell tok of
372 TokenPair (PairElem "li" _) _ -> True
373 TokenPlain txt -> Char.isSpace`Text.all`txt
374 _ -> False in
375 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
376 , if null ko then kos else Tree0 ko<|kos )
377 _ -> acc
378 rmTokenPlain =
379 Seq.filter $ \case
380 (unCell -> TokenPlain{}) -> False
381 _ -> True
382
383 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
384 spanlKeyColon name =
385 Seq.spanl $ \case
386 TreeN (unCell -> KeyBar n _) _ -> n == name
387 TreeN (unCell -> KeyGreat n _) _ -> n == name
388 _ -> False
389
390 spanlBrackets :: TCTs -> (TCTs, TCTs)
391 spanlBrackets =
392 Seq.spanl $ \case
393 TreeN (unCell -> KeyBrackets{}) _ -> True
394 _ -> False
395
396 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
397 spanlTokens =
398 first ((\case
399 Tree0 ts -> ts
400 _ -> undefined) <$>) .
401 Seq.spanl (\case
402 Tree0{} -> True
403 _ -> False)
404
405 getAttrId :: TCTs -> Text
406 getAttrId ts =
407 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
408 Just (Tree0 toks) -> TL.toStrict $ Plain.textify toks
409 _ -> ""
410
411 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
412 setXmlAttr a@(unCell -> (k, _v)) as =
413 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
414 Just idx -> Seq.update idx a as
415 Nothing -> a <| as
416
417 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
418 defXmlAttr a@(unCell -> (k, _v)) as =
419 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
420 Just _idx -> as
421 Nothing -> a <| as
422
423 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
424 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
425
426 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
427 partitionAttributesChildren ts = (attrs,cs)
428 where
429 (as,cs) = (`Seq.partition` ts) $ \case
430 TreeN (unCell -> KeyEqual{}) _cs -> True
431 _ -> False
432 attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as
433 attr = \case
434 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
435 Cell bp ep (xmlLocalName n, v)
436 where
437 v = TL.toStrict $
438 Plain.text def{Plain.state_escape = False} $
439 TreeSeq.mapAlsoNode (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a
440 _ -> undefined
441
442 elems :: Set Text
443 elems =
444 [ "about"
445 , "abstract"
446 , "address"
447 , "alias"
448 , "annotation"
449 , "area"
450 , "artwork"
451 , "aside"
452 , "audio"
453 , "author"
454 , "authors"
455 , "bcp14"
456 , "br"
457 , "call"
458 , "city"
459 , "code"
460 , "comment"
461 , "comments"
462 , "country"
463 , "date"
464 , "dd"
465 , "define"
466 , "del"
467 , "div"
468 , "dl"
469 , "document"
470 , "dt"
471 , "editor"
472 , "email"
473 , "embed"
474 , "eref"
475 , "fax"
476 , "feed"
477 , "feedback"
478 , "figure"
479 , "filter"
480 , "format"
481 , "from"
482 , "h"
483 , "hi"
484 , "html5"
485 , "i"
486 , "index"
487 , "iref"
488 , "keyword"
489 , "li"
490 , "link"
491 , "name"
492 , "note"
493 , "ol"
494 , "organization"
495 , "para"
496 , "postamble"
497 , "preamble"
498 , "q"
499 , "ref"
500 , "reference"
501 , "references"
502 , "region"
503 , "rref"
504 , "sc"
505 , "section"
506 , "serie"
507 , "source"
508 , "span"
509 , "street"
510 , "style"
511 , "sub"
512 , "sup"
513 , "table"
514 , "tbody"
515 , "td"
516 , "tel"
517 , "tfoot"
518 , "title"
519 , "th"
520 , "thead"
521 , "toc"
522 , "tof"
523 , "tr"
524 , "tt"
525 , "u"
526 , "ul"
527 , "uri"
528 , "version"
529 , "video"
530 , "workgroup"
531 , "xml"
532 , "zipcode"
533 ]