]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Add DTC HTML5 writing draft.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.TCT.Write.XML where
9
10 import Control.Arrow (first)
11 import Control.Monad (Monad(..), (=<<))
12 import Data.Bool
13 import Data.Eq (Eq(..))
14 import Data.Foldable (null, foldl', any)
15 import Data.Function (($), (.), id)
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Int (Int)
18 import Data.Map.Strict (Map)
19 import Data.Maybe (Maybe(..), maybe)
20 import Data.Monoid (Monoid(..))
21 import Data.Ord (Ord(..))
22 import Data.Semigroup (Semigroup(..))
23 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
24 import Data.Set (Set)
25 import Data.String (IsString(..))
26 import Data.Text (Text)
27 import Data.TreeSeq.Strict (Tree(..))
28 import GHC.Exts (toList)
29 import Prelude (error, undefined)
30 import Text.Show (Show(..), showChar, showString)
31 import qualified Data.Char as Char
32 import qualified Data.List as List
33 import qualified Data.Sequence as Seq
34 import qualified Data.Text as Text
35 import qualified Data.Text.Lazy as TL
36 import qualified Language.TCT.Write.Text as Write
37 import qualified System.FilePath as FP
38
39 import Language.TCT hiding (Parser)
40 import qualified Data.TreeSeq.Strict as TreeSeq
41
42 -- * Type 'XML'
43 type XML = Tree (Cell XmlName) (Cell XmlLeaf)
44 type XMLs = Seq XML
45
46 -- ** Type 'XmlName'
47 data XmlName
48 = XmlName
49 { xmlNamePrefix :: Text
50 , xmlNameSpace :: Text
51 , xmlNameLocal :: Text
52 }
53 instance Show XmlName where
54 showsPrec _p XmlName{xmlNameSpace="", ..} =
55 showString (Text.unpack xmlNameLocal)
56 showsPrec _p XmlName{..} =
57 if Text.null xmlNameSpace
58 then showString (Text.unpack xmlNameLocal)
59 else
60 showChar '{' .
61 showString (Text.unpack xmlNameSpace) .
62 showChar '}' .
63 showString (Text.unpack xmlNameLocal)
64 instance Eq XmlName where
65 XmlName _ sx lx == XmlName _ sy ly = sx == sy && lx == ly
66 instance Ord XmlName where
67 XmlName _ sx lx `compare` XmlName _ sy ly = compare sx sy <> compare lx ly
68 instance IsString XmlName where
69 fromString "" = XmlName "" "" ""
70 fromString full@('{':rest) =
71 case List.break (== '}') rest of
72 (_, "") -> error ("Invalid Clark notation: " <> show full)
73 (ns, local) -> XmlName "" (Text.pack ns) (Text.pack $ List.drop 1 local)
74 fromString local = XmlName "" "" (Text.pack local)
75
76 xmlLocalName :: Text -> XmlName
77 xmlLocalName = XmlName "" ""
78
79 -- ** Type 'XmlLeaf'
80 data XmlLeaf
81 = XmlAttr XmlName Text
82 | XmlComment Text
83 | XmlText Text
84 deriving (Eq,Ord,Show)
85
86 -- ** Type 'XmlPos'
87 data XmlPos
88 = XmlPos
89 { xmlPosAncestors :: [(XmlName,Count)]
90 , xmlPosPrecedingsSiblings :: Map XmlName Count
91 } deriving (Eq,Show)
92 type Count = Int
93
94 -- * Type 'InhXml'
95 data InhXml
96 = InhXml
97 { inhXml_figure :: Bool
98 , inhXml_tree0 :: [Pos -> XMLs -> XML]
99 , inhXml_titles :: Seq Tokens
100 }
101 inhXml :: InhXml
102 inhXml = InhXml
103 { inhXml_figure = False
104 , inhXml_tree0 = []
105 , inhXml_titles = mempty
106 }
107
108 mimetype :: Text -> Maybe Text
109 mimetype "hs" = Just "text/x-haskell"
110 mimetype "sh" = Just "text/x-shellscript"
111 mimetype "shell" = Just "text/x-shellscript"
112 mimetype "shellscript" = Just "text/x-shellscript"
113 mimetype _ = Nothing
114
115 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
116 xmlPhantom n bp = TreeN (Cell bp bp n)
117 xmlPara :: Pos -> XMLs -> XML
118 xmlPara = xmlPhantom "para"
119 xmlTitle :: Pos -> XMLs -> XML
120 xmlTitle = xmlPhantom "title"
121 xmlName :: Pos -> XMLs -> XML
122 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
123 xmlName bp ts = xmlPhantom "name" bp ts
124
125 xmlDocument :: TCTs -> XMLs
126 xmlDocument trees =
127 case Seq.viewl trees of
128 TreeN (unCell -> KeySection{}) vs :< ts ->
129 case spanlTokens vs of
130 (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') ->
131 let vs'' =
132 case Seq.findIndexL
133 (\case
134 TreeN (unCell -> KeyColon "about" _) _ -> True
135 _ -> False) vs' of
136 Just{} -> vs'
137 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
138 in
139 xmlTCTs inhXml
140 { inhXml_titles = titles
141 , inhXml_figure = True
142 , inhXml_tree0 = List.repeat xmlPara
143 } vs'' <>
144 xmlTCTs inhXml ts
145 _ -> xmlTCTs inhXml trees
146 _ -> xmlTCTs inhXml trees
147
148 xmlTCTs :: InhXml -> TCTs -> XMLs
149 xmlTCTs inh_orig = go inh_orig
150 where
151 go :: InhXml -> TCTs -> XMLs
152 go inh trees =
153 case Seq.viewl trees of
154 TreeN (Cell bp ep (KeyBar n _)) _ :< _
155 | (body,ts) <- spanlBar n trees
156 , not (null body) ->
157 (<| go inh ts) $
158 TreeN (Cell bp ep "artwork") $
159 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
160 body >>= xmlTCT inh{inhXml_tree0=[]}
161
162 TreeN key@(unCell -> KeyColon n _) cs :< ts
163 | (cs',ts') <- spanlKeyColon n ts
164 , not (null cs') ->
165 go inh $ TreeN key (cs<>cs') <| ts'
166
167 TreeN (Cell bp ep KeyBrackets{}) _ :< _
168 | (rl,ts) <- spanlBrackets trees
169 , not (null rl) ->
170 (<| go inh ts) $
171 TreeN (Cell bp ep "rl") $
172 rl >>= xmlTCT inh_orig
173
174 _ | (ul,ts) <- spanlItems (==KeyDash) trees
175 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
176 (<| go inh ts) $
177 TreeN (Cell bp ep "ul") $
178 ul >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
179
180 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
181 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
182 (<| go inh ts) $
183 TreeN (Cell bp ep "ol") $
184 ol >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
185
186 t@(Tree0 toks) :< ts | isTokenElem toks ->
187 xmlTCT inh_orig t <>
188 go inh ts
189
190 t@(Tree0 toks) :< ts ->
191 case inhXml_tree0 inh of
192 [] ->
193 xmlTCT inh_orig t <>
194 go inh{inhXml_tree0=[]} ts
195 x:xs ->
196 case Seq.viewl toks of
197 EmptyL -> go inh{inhXml_tree0=xs} ts
198 Cell bp _ep _ :< _ ->
199 (<| go inh{inhXml_tree0=xs} ts) $
200 x bp $
201 xmlTCT inh_orig t
202
203 t:<ts ->
204 xmlTCT inh_orig t <>
205 go inh ts
206
207 _ -> mempty
208
209 xmlTCT :: InhXml -> TCT -> XMLs
210 xmlTCT inh tr =
211 case tr of
212 TreeN (Cell bp ep KeySection{}) ts ->
213 let (attrs,body) = partitionAttributesChildren ts in
214 let inh' = inh
215 { inhXml_tree0 = xmlTitle : List.repeat xmlPara
216 , inhXml_figure = True
217 } in
218 Seq.singleton $
219 TreeN (Cell bp ep "section") $
220 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
221 xmlTCTs inh' body
222
223 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
224 let (attrs,body) = partitionAttributesChildren ts in
225 let inh' = inh { inhXml_tree0 =
226 case kn of
227 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
228 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
229 "author" -> List.repeat xmlName
230 _ -> []
231 } in
232 case () of
233 _ | kn == "about" -> xmlAbout inh' key attrs body
234
235 _ | inhXml_figure inh && not (kn`List.elem`elems) ->
236 Seq.singleton $
237 TreeN (Cell bp ep "figure") $
238 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
239 case toList body of
240 [Tree0{}] -> xmlTCTs inh'{inhXml_tree0 = List.repeat xmlPara} body
241 _ -> xmlTCTs inh'{inhXml_tree0 = xmlTitle : List.repeat xmlPara} body
242
243 _ -> Seq.singleton $ xmlKey inh' key attrs body
244
245 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
246
247 Tree0 ts -> xmlTokens ts
248
249 xmlAbout ::
250 InhXml ->
251 Cell Key -> Seq (Cell (XmlName, Text)) ->
252 TCTs -> XMLs
253 xmlAbout inh key attrs body =
254 Seq.singleton $
255 xmlKey inh key attrs $
256 case Seq.viewl (inhXml_titles inh) of
257 (Seq.viewl -> Cell bt _et _ :< _) :< _ ->
258 ((<$> inhXml_titles inh) $ \title ->
259 TreeN (Cell bt bt $ KeyColon "title" "") $
260 Seq.singleton $ Tree0 title)
261 <> body
262 _ -> body
263
264 xmlKey :: InhXml -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
265 xmlKey inh (Cell bp ep key) attrs ts =
266 case key of
267 KeyColon n _wh -> d_key n
268 KeyGreat n _wh -> d_key n
269 KeyEqual n _wh -> d_key n
270 KeyBar n _wh -> d_key n
271 KeyDot _n -> TreeN (cell "li") $ xmlTCTs inh ts
272 KeyDash -> TreeN (cell "li") $ xmlTCTs inh ts
273 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
274 where
275 com :: TL.Text
276 com =
277 Write.text Write.config_text $
278 TreeSeq.mapAlsoKey
279 (cell1 . unCell)
280 (\_path -> fmap $ cell1 . unCell) <$> ts
281 KeyLower n as -> TreeN (cell "artwork") $ xmlTCTs inh ts
282 KeyBrackets ident ->
283 let inh' = inh{inhXml_figure = False} in
284 TreeN (cell "reference") $
285 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) attrs) <>
286 xmlTCTs inh' ts
287 KeyDotSlash p ->
288 TreeN (cell "include") $
289 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
290 xmlTCTs inh ts
291 where
292 cell :: a -> Cell a
293 cell = Cell bp ep
294 d_key :: Text -> XML
295 d_key n =
296 TreeN (cell $ xmlLocalName n) $
297 xmlAttrs attrs <>
298 xmlTCTs inh ts
299
300 xmlTokens :: Tokens -> XMLs
301 xmlTokens tok = goTokens tok
302 where
303 go :: Cell Token -> XMLs
304 go (Cell bp ep tk) =
305 case tk of
306 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
307 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
308 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
309 TokenLink lnk -> Seq.singleton $
310 TreeN (cell "eref") $
311 xmlAttrs [cell ("to",lnk)] |>
312 Tree0 (cell $ XmlText lnk)
313 TokenPair PairBracket ts | to <- Write.textTokens ts
314 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
315 Seq.singleton $
316 TreeN (cell "rref") $
317 xmlAttrs [cell ("to",TL.toStrict to)]
318 TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ goTokens ts
319 TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ goTokens ts
320 TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ goTokens ts
321 TokenPair PairFrenchquote toks@ts ->
322 Seq.singleton $
323 TreeN (cell "q") $
324 case ts of
325 (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) ->
326 case Seq.viewr ls of
327 m :> Cell br er (TokenPlain r) ->
328 goTokens $
329 Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))
330 <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))
331 _ ->
332 goTokens $
333 Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls
334 (Seq.viewr -> rs :> Cell br er (TokenPlain r)) ->
335 goTokens $
336 rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r))
337 _ -> goTokens toks
338 TokenPair PairHash to ->
339 Seq.singleton $
340 TreeN (cell "ref") $
341 xmlAttrs [cell ("to",TL.toStrict $ Write.textTokens to)]
342 TokenPair (PairElem name attrs) ts ->
343 Seq.singleton $
344 TreeN (cell $ xmlLocalName name) $
345 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) -> cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
346 goTokens ts
347 TokenPair p ts ->
348 let (o,c) = pairBorders p ts in
349 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
350 goTokens ts `unionXml`
351 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
352 where
353 cell :: a -> Cell a
354 cell = Cell bp ep
355
356 goTokens :: Tokens -> XMLs
357 goTokens toks =
358 case Seq.viewl toks of
359 Cell bp _ep (TokenPair PairParen paren)
360 :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket)
361 :< ts) ->
362 (<| goTokens ts) $
363 case bracket of
364 (toList -> [Cell bl el (TokenLink lnk)]) ->
365 TreeN (Cell bp eb "eref") $
366 xmlAttrs [Cell bl el ("to",lnk)] <>
367 goTokens paren
368 _ ->
369 TreeN (Cell bp eb "rref") $
370 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Write.textTokens bracket)] <>
371 goTokens paren
372 t :< ts -> go t `unionXml` goTokens ts
373 Seq.EmptyL -> mempty
374
375 -- | Unify two 'XMLs', merging border 'XmlText's if any.
376 unionXml :: XMLs -> XMLs -> XMLs
377 unionXml x y =
378 case (Seq.viewr x, Seq.viewl y) of
379 (xs :> x0, y0 :< ys) ->
380 case (x0,y0) of
381 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
382 xs `unionXml`
383 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
384 ys
385 _ -> x <> y
386 (Seq.EmptyR, _) -> y
387 (_, Seq.EmptyL) -> x
388
389
390 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
391 spanlBar name = first unKeyBar . spanBar
392 where
393 unKeyBar :: TCTs -> TCTs
394 unKeyBar = (=<<) $ \case
395 TreeN (unCell -> KeyBar{}) ts -> ts
396 _ -> mempty
397 spanBar =
398 Seq.spanl $ \case
399 TreeN (unCell -> KeyBar n _) _ | n == name -> True
400 _ -> False
401
402 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
403 spanlItems liKey ts =
404 let (lis, ts') = spanLIs ts in
405 foldl' accumLIs (mempty,ts') lis
406 where
407 spanLIs = Seq.spanl $ \case
408 TreeN (unCell -> liKey -> True) _ -> True
409 Tree0 toks ->
410 (`any` toks) $ \case
411 (unCell -> TokenPair (PairElem "li" _) _) -> True
412 _ -> False
413 {-
414 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
415 [unCell -> TokenPair (PairElem "li" _) _] -> True
416 _ -> False
417 -}
418 _ -> False
419 accumLIs acc@(oks,kos) t =
420 case t of
421 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
422 Tree0 toks ->
423 let (ok,ko) =
424 (`Seq.spanl` toks) $ \tok ->
425 case unCell tok of
426 TokenPair (PairElem "li" _) _ -> True
427 TokenPlain txt -> Char.isSpace`Text.all`txt
428 _ -> False in
429 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
430 , if null ko then kos else Tree0 ko<|kos )
431 _ -> acc
432 rmTokenPlain =
433 Seq.filter $ \case
434 (unCell -> TokenPlain{}) -> False
435 _ -> True
436
437 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
438 spanlKeyColon name =
439 Seq.spanl $ \case
440 TreeN (unCell -> KeyBar n _) _ -> n == name
441 TreeN (unCell -> KeyGreat n _) _ -> n == name
442 _ -> False
443
444 spanlBrackets :: TCTs -> (TCTs, TCTs)
445 spanlBrackets =
446 Seq.spanl $ \case
447 TreeN (unCell -> KeyBrackets{}) _ -> True
448 _ -> False
449
450 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
451 spanlTokens =
452 first ((\case
453 Tree0 ts -> ts
454 _ -> undefined) <$>) .
455 Seq.spanl (\case
456 Tree0{} -> True
457 _ -> False)
458
459 getAttrId :: TCTs -> Text
460 getAttrId ts =
461 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
462 Just (Tree0 toks) -> TL.toStrict $ Write.textTokens toks
463 _ -> ""
464
465 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
466 setXmlAttr a@(unCell -> (k, _v)) as =
467 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
468 Just idx -> Seq.update idx a as
469 Nothing -> a <| as
470
471 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
472 defXmlAttr a@(unCell -> (k, _v)) as =
473 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
474 Just _idx -> as
475 Nothing -> a <| as
476
477 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
478 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
479
480 {-
481 xmlAttr :: XmlAttrs -> (Text,Attr) -> XmlAttrs
482 xmlAttr acc (_,Attr{..}) = Map.insert (xmlLocalName attr_name) attr_value acc
483 -- TODO: conflict
484 -}
485
486 {-
487 d_Attributes :: XmlAttrs -> DTC -> DTC
488 d_Attributes = flip $ Map.foldrWithKey $ \n v ->
489 B.AddCustomAttribute (B.Text n) (B.Text v)
490 -}
491
492 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
493 partitionAttributesChildren ts = (attrs,cs)
494 where
495 (as,cs) = (`Seq.partition` ts) $ \case
496 TreeN (unCell -> KeyEqual{}) _cs -> True
497 _ -> False
498 attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as
499 attr = \case
500 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
501 Cell bp ep (xmlLocalName n, v)
502 where
503 v = TL.toStrict $
504 Write.text Write.config_text{Write.config_text_escape = False} $
505 TreeSeq.mapAlsoKey (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a
506 _ -> undefined
507
508 elems :: Set Text
509 elems =
510 [ "about"
511 , "abstract"
512 , "address"
513 , "alias"
514 , "annotation"
515 , "area"
516 , "artwork"
517 , "aside"
518 , "audio"
519 , "author"
520 , "authors"
521 , "bcp14"
522 , "br"
523 , "call"
524 , "city"
525 , "code"
526 , "comment"
527 , "comments"
528 , "country"
529 , "date"
530 , "dd"
531 , "define"
532 , "del"
533 , "div"
534 , "dl"
535 , "document"
536 , "dt"
537 , "editor"
538 , "email"
539 , "embed"
540 , "eref"
541 , "fax"
542 , "feed"
543 , "feedback"
544 , "figure"
545 , "filter"
546 , "format"
547 , "from"
548 , "h"
549 , "hi"
550 , "html5"
551 , "i"
552 , "index"
553 , "iref"
554 , "keyword"
555 , "li"
556 , "link"
557 , "name"
558 , "note"
559 , "ol"
560 , "organization"
561 , "para"
562 , "postamble"
563 , "preamble"
564 , "q"
565 , "ref"
566 , "reference"
567 , "region"
568 , "rl"
569 , "rref"
570 , "sc"
571 , "section"
572 , "serie"
573 , "source"
574 , "span"
575 , "street"
576 , "style"
577 , "sub"
578 , "sup"
579 , "table"
580 , "tbody"
581 , "td"
582 , "tel"
583 , "tfoot"
584 , "title"
585 , "th"
586 , "thead"
587 , "toc"
588 , "tof"
589 , "tr"
590 , "tt"
591 , "ul"
592 , "uri"
593 , "version"
594 , "video"
595 , "workgroup"
596 , "xml"
597 , "zipcode"
598 ]