]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Add DTC HTML5 writing draft.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.TCT.Write.XML where
9
10 import Control.Arrow (first)
11 import Control.Monad (Monad(..), (=<<))
12 import Data.Bool
13 import Data.Eq (Eq(..))
14 import Data.Foldable (null, foldl', any)
15 import Data.Function (($), (.), id)
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Maybe (Maybe(..), maybe)
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
22 import Data.Set (Set)
23 import Data.String (IsString(..))
24 import Data.Text (Text)
25 import Data.TreeSeq.Strict (Tree(..))
26 import GHC.Exts (toList)
27 import Prelude (error, undefined)
28 import Text.Show (Show(..), showChar, showString)
29 import qualified Data.Char as Char
30 import qualified Data.List as List
31 import qualified Data.Sequence as Seq
32 import qualified Data.Text as Text
33 import qualified Data.Text.Lazy as TL
34 import qualified Language.TCT.Write.Text as Write
35 import qualified System.FilePath as FP
36
37 import Language.TCT hiding (Parser)
38 import qualified Data.TreeSeq.Strict as TreeSeq
39
40 -- * Type 'XML'
41 type XML = Tree (Cell XmlName) (Cell XmlLeaf)
42 type XMLs = Seq XML
43
44 -- ** Type 'XmlName'
45 data XmlName
46 = XmlName
47 { xmlNamePrefix :: Text
48 , xmlNameSpace :: Text
49 , xmlNameLocal :: Text
50 }
51 instance Show XmlName where
52 showsPrec _p XmlName{xmlNameSpace="", ..} =
53 showString (Text.unpack xmlNameLocal)
54 showsPrec _p XmlName{..} =
55 if Text.null xmlNameSpace
56 then showString (Text.unpack xmlNameLocal)
57 else
58 showChar '{' .
59 showString (Text.unpack xmlNameSpace) .
60 showChar '}' .
61 showString (Text.unpack xmlNameLocal)
62 instance Eq XmlName where
63 XmlName _ sx lx == XmlName _ sy ly = sx == sy && lx == ly
64 instance Ord XmlName where
65 XmlName _ sx lx `compare` XmlName _ sy ly = compare sx sy <> compare lx ly
66 instance IsString XmlName where
67 fromString "" = XmlName "" "" ""
68 fromString full@('{':rest) =
69 case List.break (== '}') rest of
70 (_, "") -> error ("Invalid Clark notation: " <> show full)
71 (ns, local) -> XmlName "" (Text.pack ns) (Text.pack $ List.drop 1 local)
72 fromString local = XmlName "" "" (Text.pack local)
73
74 xmlLocalName :: Text -> XmlName
75 xmlLocalName = XmlName "" ""
76
77 -- ** Type 'XmlLeaf'
78 data XmlLeaf
79 = XmlAttr XmlName Text
80 | XmlComment Text
81 | XmlText Text
82 deriving (Eq,Ord,Show)
83
84 -- * Type 'InhXml'
85 data InhXml
86 = InhXml
87 { inhXml_figure :: Bool
88 , inhXml_tree0 :: [Pos -> XMLs -> XML]
89 , inhXml_titles :: Seq Tokens
90 }
91 inhXml :: InhXml
92 inhXml = InhXml
93 { inhXml_figure = False
94 , inhXml_tree0 = []
95 , inhXml_titles = mempty
96 }
97
98 mimetype :: Text -> Maybe Text
99 mimetype "hs" = Just "text/x-haskell"
100 mimetype "sh" = Just "text/x-shellscript"
101 mimetype "shell" = Just "text/x-shellscript"
102 mimetype "shellscript" = Just "text/x-shellscript"
103 mimetype _ = Nothing
104
105 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
106 xmlPhantom n bp = TreeN (Cell bp bp n)
107 xmlPara :: Pos -> XMLs -> XML
108 xmlPara = xmlPhantom "para"
109 xmlTitle :: Pos -> XMLs -> XML
110 xmlTitle = xmlPhantom "title"
111 xmlName :: Pos -> XMLs -> XML
112 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
113 xmlName bp ts = xmlPhantom "name" bp ts
114
115 xmlDocument :: TCTs -> XMLs
116 xmlDocument trees =
117 case Seq.viewl trees of
118 TreeN (unCell -> KeySection{}) vs :< ts ->
119 case spanlTokens vs of
120 (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') ->
121 let vs'' =
122 case Seq.findIndexL
123 (\case
124 TreeN (unCell -> KeyColon "about" _) _ -> True
125 _ -> False) vs' of
126 Just{} -> vs'
127 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
128 in
129 xmlTCTs inhXml
130 { inhXml_titles = titles
131 , inhXml_figure = True
132 , inhXml_tree0 = List.repeat xmlPara
133 } vs'' <>
134 xmlTCTs inhXml ts
135 _ -> xmlTCTs inhXml trees
136 _ -> xmlTCTs inhXml trees
137
138 xmlTCTs :: InhXml -> TCTs -> XMLs
139 xmlTCTs inh_orig = go inh_orig
140 where
141 go :: InhXml -> TCTs -> XMLs
142 go inh trees =
143 case Seq.viewl trees of
144 TreeN (Cell bp ep (KeyBar n _)) _ :< _
145 | (body,ts) <- spanlBar n trees
146 , not (null body) ->
147 (<| go inh ts) $
148 TreeN (Cell bp ep "artwork") $
149 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
150 body >>= xmlTCT inh{inhXml_tree0=[]}
151
152 TreeN key@(unCell -> KeyColon n _) cs :< ts
153 | (cs',ts') <- spanlKeyColon n ts
154 , not (null cs') ->
155 go inh $ TreeN key (cs<>cs') <| ts'
156
157 TreeN (Cell bp ep KeyBrackets{}) _ :< _
158 | (rl,ts) <- spanlBrackets trees
159 , not (null rl) ->
160 (<| go inh ts) $
161 TreeN (Cell bp ep "rl") $
162 rl >>= xmlTCT inh_orig
163
164 _ | (ul,ts) <- spanlItems (==KeyDash) trees
165 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
166 (<| go inh ts) $
167 TreeN (Cell bp ep "ul") $
168 ul >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
169
170 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
171 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
172 (<| go inh ts) $
173 TreeN (Cell bp ep "ol") $
174 ol >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
175
176 t@(Tree0 toks) :< ts | isTokenElem toks ->
177 xmlTCT inh_orig t <>
178 go inh ts
179
180 t@(Tree0 toks) :< ts ->
181 case inhXml_tree0 inh of
182 [] ->
183 xmlTCT inh_orig t <>
184 go inh{inhXml_tree0=[]} ts
185 x:xs ->
186 case Seq.viewl toks of
187 EmptyL -> go inh{inhXml_tree0=xs} ts
188 Cell bp _ep _ :< _ ->
189 (<| go inh{inhXml_tree0=xs} ts) $
190 x bp $
191 xmlTCT inh_orig t
192
193 t:<ts ->
194 xmlTCT inh_orig t <>
195 go inh ts
196
197 _ -> mempty
198
199 xmlTCT :: InhXml -> TCT -> XMLs
200 xmlTCT inh tr =
201 case tr of
202 TreeN (Cell bp ep KeySection{}) ts ->
203 let (attrs,body) = partitionAttributesChildren ts in
204 let inh' = inh
205 { inhXml_tree0 = xmlTitle : List.repeat xmlPara
206 , inhXml_figure = True
207 } in
208 Seq.singleton $
209 TreeN (Cell bp ep "section") $
210 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
211 xmlTCTs inh' body
212
213 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
214 let (attrs,body) = partitionAttributesChildren ts in
215 let inh' = inh { inhXml_tree0 =
216 case kn of
217 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
218 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
219 "author" -> List.repeat xmlName
220 _ -> []
221 } in
222 case () of
223 _ | kn == "about" -> xmlAbout inh' key attrs body
224
225 _ | inhXml_figure inh && not (kn`List.elem`elems) ->
226 Seq.singleton $
227 TreeN (Cell bp ep "figure") $
228 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
229 case toList body of
230 [Tree0{}] -> xmlTCTs inh'{inhXml_tree0 = List.repeat xmlPara} body
231 _ -> xmlTCTs inh'{inhXml_tree0 = xmlTitle : List.repeat xmlPara} body
232
233 _ -> Seq.singleton $ xmlKey inh' key attrs body
234
235 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
236
237 Tree0 ts -> xmlTokens ts
238
239 xmlAbout ::
240 InhXml ->
241 Cell Key -> Seq (Cell (XmlName, Text)) ->
242 TCTs -> XMLs
243 xmlAbout inh key attrs body =
244 Seq.singleton $
245 xmlKey inh key attrs $
246 case Seq.viewl (inhXml_titles inh) of
247 (Seq.viewl -> Cell bt _et _ :< _) :< _ ->
248 ((<$> inhXml_titles inh) $ \title ->
249 TreeN (Cell bt bt $ KeyColon "title" "") $
250 Seq.singleton $ Tree0 title)
251 <> body
252 _ -> body
253
254 xmlKey :: InhXml -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
255 xmlKey inh (Cell bp ep key) attrs ts =
256 case key of
257 KeyColon n _wh -> d_key n
258 KeyGreat n _wh -> d_key n
259 KeyEqual n _wh -> d_key n
260 KeyBar n _wh -> d_key n
261 KeyDot _n -> TreeN (cell "li") $ xmlTCTs inh ts
262 KeyDash -> TreeN (cell "li") $ xmlTCTs inh ts
263 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
264 where
265 com :: TL.Text
266 com =
267 Write.text Write.config_text $
268 TreeSeq.mapAlsoKey
269 (cell1 . unCell)
270 (\_path -> fmap $ cell1 . unCell) <$> ts
271 KeyLower n as -> TreeN (cell "artwork") $ xmlTCTs inh ts
272 KeyBrackets ident ->
273 let inh' = inh{inhXml_figure = False} in
274 TreeN (cell "reference") $
275 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) attrs) <>
276 xmlTCTs inh' ts
277 KeyDotSlash p ->
278 TreeN (cell "include") $
279 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
280 xmlTCTs inh ts
281 where
282 cell :: a -> Cell a
283 cell = Cell bp ep
284 d_key :: Text -> XML
285 d_key n =
286 TreeN (cell $ xmlLocalName n) $
287 xmlAttrs attrs <>
288 xmlTCTs inh ts
289
290 xmlTokens :: Tokens -> XMLs
291 xmlTokens tok = goTokens tok
292 where
293 go :: Cell Token -> XMLs
294 go (Cell bp ep tk) =
295 case tk of
296 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
297 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
298 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
299 TokenLink lnk -> Seq.singleton $
300 TreeN (cell "eref") $
301 xmlAttrs [cell ("to",lnk)] |>
302 Tree0 (cell $ XmlText lnk)
303 TokenPair PairBracket ts | to <- Write.textTokens ts
304 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
305 Seq.singleton $
306 TreeN (cell "rref") $
307 xmlAttrs [cell ("to",TL.toStrict to)]
308 TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ goTokens ts
309 TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ goTokens ts
310 TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ goTokens ts
311 TokenPair PairFrenchquote toks@ts ->
312 Seq.singleton $
313 TreeN (cell "q") $
314 case ts of
315 (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) ->
316 case Seq.viewr ls of
317 m :> Cell br er (TokenPlain r) ->
318 goTokens $
319 Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))
320 <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))
321 _ ->
322 goTokens $
323 Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls
324 (Seq.viewr -> rs :> Cell br er (TokenPlain r)) ->
325 goTokens $
326 rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r))
327 _ -> goTokens toks
328 TokenPair PairHash (toList -> [unCell -> TokenPlain t]) ->
329 Seq.singleton $
330 TreeN (cell "ref") $
331 xmlAttrs [cell ("to",t)]
332 TokenPair (PairElem name attrs) ts ->
333 Seq.singleton $
334 TreeN (cell $ xmlLocalName name) $
335 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) -> cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
336 goTokens ts
337 TokenPair p ts ->
338 let (o,c) = pairBorders p ts in
339 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
340 goTokens ts `unionXml`
341 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
342 where
343 cell :: a -> Cell a
344 cell = Cell bp ep
345
346 goTokens :: Tokens -> XMLs
347 goTokens toks =
348 case Seq.viewl toks of
349 Cell bp _ep (TokenPair PairParen paren)
350 :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket)
351 :< ts) ->
352 (<| goTokens ts) $
353 case bracket of
354 (toList -> [Cell bl el (TokenLink lnk)]) ->
355 TreeN (Cell bp eb "eref") $
356 xmlAttrs [Cell bl el ("to",lnk)] <>
357 goTokens paren
358 _ ->
359 TreeN (Cell bp eb "rref") $
360 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Write.textTokens bracket)] <>
361 goTokens paren
362 t :< ts -> go t `unionXml` goTokens ts
363 Seq.EmptyL -> mempty
364
365 -- | Unify two 'XMLs', merging border 'XmlText's if any.
366 unionXml :: XMLs -> XMLs -> XMLs
367 unionXml x y =
368 case (Seq.viewr x, Seq.viewl y) of
369 (xs :> x0, y0 :< ys) ->
370 case (x0,y0) of
371 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
372 xs `unionXml`
373 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
374 ys
375 _ -> x <> y
376 (Seq.EmptyR, _) -> y
377 (_, Seq.EmptyL) -> x
378
379
380 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
381 spanlBar name = first unKeyBar . spanBar
382 where
383 unKeyBar :: TCTs -> TCTs
384 unKeyBar = (=<<) $ \case
385 TreeN (unCell -> KeyBar{}) ts -> ts
386 _ -> mempty
387 spanBar =
388 Seq.spanl $ \case
389 TreeN (unCell -> KeyBar n _) _ | n == name -> True
390 _ -> False
391
392 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
393 spanlItems liKey ts =
394 let (lis, ts') = spanLIs ts in
395 foldl' accumLIs (mempty,ts') lis
396 where
397 spanLIs = Seq.spanl $ \case
398 TreeN (unCell -> liKey -> True) _ -> True
399 Tree0 toks ->
400 (`any` toks) $ \case
401 (unCell -> TokenPair (PairElem "li" _) _) -> True
402 _ -> False
403 {-
404 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
405 [unCell -> TokenPair (PairElem "li" _) _] -> True
406 _ -> False
407 -}
408 _ -> False
409 accumLIs acc@(oks,kos) t =
410 case t of
411 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
412 Tree0 toks ->
413 let (ok,ko) =
414 (`Seq.spanl` toks) $ \tok ->
415 case unCell tok of
416 TokenPair (PairElem "li" _) _ -> True
417 TokenPlain txt -> Char.isSpace`Text.all`txt
418 _ -> False in
419 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
420 , if null ko then kos else Tree0 ko<|kos )
421 _ -> acc
422 rmTokenPlain =
423 Seq.filter $ \case
424 (unCell -> TokenPlain{}) -> False
425 _ -> True
426
427 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
428 spanlKeyColon name =
429 Seq.spanl $ \case
430 TreeN (unCell -> KeyBar n _) _ -> n == name
431 TreeN (unCell -> KeyGreat n _) _ -> n == name
432 _ -> False
433
434 spanlBrackets :: TCTs -> (TCTs, TCTs)
435 spanlBrackets =
436 Seq.spanl $ \case
437 TreeN (unCell -> KeyBrackets{}) _ -> True
438 _ -> False
439
440 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
441 spanlTokens =
442 first ((\case
443 Tree0 ts -> ts
444 _ -> undefined) <$>) .
445 Seq.spanl (\case
446 Tree0{} -> True
447 _ -> False)
448
449 getAttrId :: TCTs -> Text
450 getAttrId ts =
451 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
452 Just (Tree0 toks) -> TL.toStrict $ Write.textTokens toks
453 _ -> ""
454
455 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
456 setXmlAttr a@(unCell -> (k, _v)) as =
457 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
458 Just idx -> Seq.update idx a as
459 Nothing -> a <| as
460
461 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
462 defXmlAttr a@(unCell -> (k, _v)) as =
463 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
464 Just _idx -> as
465 Nothing -> a <| as
466
467 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
468 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
469
470 {-
471 xmlAttr :: XmlAttrs -> (Text,Attr) -> XmlAttrs
472 xmlAttr acc (_,Attr{..}) = Map.insert (xmlLocalName attr_name) attr_value acc
473 -- TODO: conflict
474 -}
475
476 {-
477 d_Attributes :: XmlAttrs -> DTC -> DTC
478 d_Attributes = flip $ Map.foldrWithKey $ \n v ->
479 B.AddCustomAttribute (B.Text n) (B.Text v)
480 -}
481
482 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
483 partitionAttributesChildren ts = (attrs,cs)
484 where
485 (as,cs) = (`Seq.partition` ts) $ \case
486 TreeN (unCell -> KeyEqual{}) _cs -> True
487 _ -> False
488 attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as
489 attr = \case
490 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
491 Cell bp ep (xmlLocalName n, v)
492 where
493 v = TL.toStrict $
494 Write.text Write.config_text{Write.config_text_escape = False} $
495 TreeSeq.mapAlsoKey (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a
496 _ -> undefined
497
498 elems :: Set Text
499 elems =
500 [ "about"
501 , "abstract"
502 , "address"
503 , "alias"
504 , "annotation"
505 , "area"
506 , "artwork"
507 , "aside"
508 , "audio"
509 , "author"
510 , "authors"
511 , "bcp14"
512 , "br"
513 , "call"
514 , "city"
515 , "code"
516 , "comment"
517 , "comments"
518 , "country"
519 , "date"
520 , "dd"
521 , "define"
522 , "del"
523 , "div"
524 , "dl"
525 , "document"
526 , "dt"
527 , "editor"
528 , "email"
529 , "embed"
530 , "eref"
531 , "fax"
532 , "feed"
533 , "feedback"
534 , "figure"
535 , "filter"
536 , "format"
537 , "from"
538 , "h"
539 , "hi"
540 , "html5"
541 , "i"
542 , "index"
543 , "iref"
544 , "keyword"
545 , "li"
546 , "link"
547 , "name"
548 , "note"
549 , "ol"
550 , "organization"
551 , "para"
552 , "postamble"
553 , "preamble"
554 , "q"
555 , "ref"
556 , "reference"
557 , "region"
558 , "rl"
559 , "rref"
560 , "sc"
561 , "section"
562 , "serie"
563 , "source"
564 , "span"
565 , "street"
566 , "style"
567 , "sub"
568 , "sup"
569 , "table"
570 , "tbody"
571 , "td"
572 , "tel"
573 , "tfoot"
574 , "title"
575 , "th"
576 , "thead"
577 , "toc"
578 , "tof"
579 , "tr"
580 , "tt"
581 , "ul"
582 , "uri"
583 , "version"
584 , "video"
585 , "workgroup"
586 , "xml"
587 , "zipcode"
588 ]