]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Fix <figure/> and <section/>.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.TCT.Write.XML where
9
10 import Control.Arrow (first)
11 import Control.Monad (Monad(..), (=<<))
12 import Data.Bool
13 import Data.Eq (Eq(..))
14 import Data.Foldable (null, foldl', any)
15 import Data.Function (($), (.), id)
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Maybe (Maybe(..), maybe)
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
22 import Data.Set (Set)
23 import Data.String (IsString(..))
24 import Data.Text (Text)
25 import Data.TreeSeq.Strict (Tree(..))
26 import GHC.Exts (toList)
27 import Prelude (error, undefined)
28 import Text.Show (Show(..), showChar, showString)
29 import qualified Data.Char as Char
30 import qualified Data.List as List
31 import qualified Data.Sequence as Seq
32 import qualified Data.Text as Text
33 import qualified Data.Text.Lazy as TL
34 import qualified Language.TCT.Write.Text as Write
35 import qualified System.FilePath as FP
36
37 import Language.TCT hiding (Parser)
38 import qualified Data.TreeSeq.Strict as TreeSeq
39
40
41 -- * Type 'XML'
42 type XML = Tree (Cell XmlName) (Cell XmlLeaf)
43 type XMLs = Seq XML
44
45 -- ** Type 'XmlName'
46 data XmlName
47 = XmlName
48 { xmlNamePrefix :: Text
49 , xmlNameSpace :: Text
50 , xmlNameLocal :: Text
51 }
52 instance Show XmlName where
53 showsPrec _p XmlName{xmlNameSpace="", ..} =
54 showString (Text.unpack xmlNameLocal)
55 showsPrec _p XmlName{..} =
56 if Text.null xmlNameSpace
57 then showString (Text.unpack xmlNameLocal)
58 else
59 showChar '{' .
60 showString (Text.unpack xmlNameSpace) .
61 showChar '}' .
62 showString (Text.unpack xmlNameLocal)
63 instance Eq XmlName where
64 XmlName _ sx lx == XmlName _ sy ly = sx == sy && lx == ly
65 instance Ord XmlName where
66 XmlName _ sx lx `compare` XmlName _ sy ly = compare sx sy <> compare lx ly
67 instance IsString XmlName where
68 fromString "" = XmlName "" "" ""
69 fromString full@('{':rest) =
70 case List.break (== '}') rest of
71 (_, "") -> error ("Invalid Clark notation: " <> show full)
72 (ns, local) -> XmlName "" (Text.pack ns) (Text.pack $ List.drop 1 local)
73 fromString local = XmlName "" "" (Text.pack local)
74
75 xmlLocalName :: Text -> XmlName
76 xmlLocalName = XmlName "" ""
77
78 -- ** Type 'XmlLeaf'
79 data XmlLeaf
80 = XmlAttr XmlName Text
81 | XmlComment Text
82 | XmlText Text
83 deriving (Eq,Ord,Show)
84
85 -- * Type 'InhXml'
86 data InhXml
87 = InhXml
88 { inhXml_figure :: Bool
89 , inhXml_tree0 :: [Pos -> XMLs -> XML]
90 , inhXml_titles :: Seq Tokens
91 }
92 inhXml :: InhXml
93 inhXml = InhXml
94 { inhXml_figure = False
95 , inhXml_tree0 = []
96 , inhXml_titles = mempty
97 }
98
99 mimetype :: Text -> Maybe Text
100 mimetype "hs" = Just "text/x-haskell"
101 mimetype "sh" = Just "text/x-shellscript"
102 mimetype "shell" = Just "text/x-shellscript"
103 mimetype "shellscript" = Just "text/x-shellscript"
104 mimetype _ = Nothing
105
106 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
107 xmlPhantom n bp = TreeN (Cell bp bp n)
108 xmlPara :: Pos -> XMLs -> XML
109 xmlPara = xmlPhantom "para"
110 xmlTitle :: Pos -> XMLs -> XML
111 xmlTitle = xmlPhantom "title"
112 xmlName :: Pos -> XMLs -> XML
113 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
114 xmlName bp ts = xmlPhantom "name" bp ts
115
116 xmlDocument :: TCTs -> XMLs
117 xmlDocument trees =
118 case Seq.viewl trees of
119 TreeN (unCell -> KeySection{}) vs :< ts ->
120 case spanlTokens vs of
121 (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') ->
122 let vs'' =
123 case Seq.findIndexL
124 (\case
125 TreeN (unCell -> KeyColon "about" _) _ -> True
126 _ -> False) vs' of
127 Just{} -> vs'
128 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
129 in
130 xmlTCTs inhXml
131 { inhXml_titles = titles
132 , inhXml_figure = True
133 , inhXml_tree0 = List.repeat xmlPara
134 } vs'' <>
135 xmlTCTs inhXml ts
136 _ -> xmlTCTs inhXml trees
137 _ -> xmlTCTs inhXml trees
138
139 xmlTCTs :: InhXml -> TCTs -> XMLs
140 xmlTCTs inh_orig = go inh_orig
141 where
142 go :: InhXml -> TCTs -> XMLs
143 go inh trees =
144 case Seq.viewl trees of
145 TreeN (Cell bp ep (KeyBar n _)) _ :< _
146 | (body,ts) <- spanlBar n trees
147 , not (null body) ->
148 (<| go inh ts) $
149 TreeN (Cell bp ep "artwork") $
150 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
151 body >>= xmlTCT inh{inhXml_tree0=[]}
152
153 TreeN key@(unCell -> KeyColon n _) cs :< ts
154 | (cs',ts') <- spanlKeyColon n ts
155 , not (null cs') ->
156 go inh $ TreeN key (cs<>cs') <| ts'
157
158 TreeN (Cell bp ep KeyBrackets{}) _ :< _
159 | (rl,ts) <- spanlBrackets trees
160 , not (null rl) ->
161 (<| go inh ts) $
162 TreeN (Cell bp ep "rl") $
163 rl >>= xmlTCT inh_orig
164
165 _ | (ul,ts) <- spanlItems (==KeyDash) trees
166 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
167 (<| go inh ts) $
168 TreeN (Cell bp ep "ul") $
169 ul >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
170
171 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
172 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
173 (<| go inh ts) $
174 TreeN (Cell bp ep "ol") $
175 ol >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
176
177 t@(Tree0 toks) :< ts | isTokenElem toks ->
178 xmlTCT inh_orig t <>
179 go inh ts
180
181 t@(Tree0 toks) :< ts ->
182 case inhXml_tree0 inh of
183 [] ->
184 xmlTCT inh_orig t <>
185 go inh{inhXml_tree0=[]} ts
186 x:xs ->
187 case Seq.viewl toks of
188 EmptyL -> go inh{inhXml_tree0=xs} ts
189 Cell bp _ep _ :< _ ->
190 (<| go inh{inhXml_tree0=xs} ts) $
191 x bp $
192 xmlTCT inh_orig t
193
194 t:<ts ->
195 xmlTCT inh_orig t <>
196 go inh ts
197
198 _ -> mempty
199
200 xmlTCT :: InhXml -> TCT -> XMLs
201 xmlTCT inh tr =
202 case tr of
203 TreeN (Cell bp ep KeySection{}) ts ->
204 let (attrs,body) = partitionAttributesChildren ts in
205 let inh' = inh
206 { inhXml_tree0 = xmlTitle : List.repeat xmlPara
207 , inhXml_figure = True
208 } in
209 Seq.singleton $
210 TreeN (Cell bp ep "section") $
211 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
212 xmlTCTs inh' body
213
214 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
215 let (attrs,body) = partitionAttributesChildren ts in
216 let inh' = inh { inhXml_tree0 =
217 case kn of
218 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
219 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
220 "author" -> List.repeat xmlName
221 _ -> []
222 } in
223 case () of
224 _ | kn == "about" -> xmlAbout inh' key attrs body
225
226 _ | inhXml_figure inh && not (kn`List.elem`elems) ->
227 Seq.singleton $
228 TreeN (Cell bp ep "figure") $
229 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
230 case toList body of
231 [Tree0{}] -> xmlTCTs inh'{inhXml_tree0 = List.repeat xmlPara} body
232 _ -> xmlTCTs inh'{inhXml_tree0 = xmlTitle : List.repeat xmlPara} body
233
234 _ -> Seq.singleton $ xmlKey inh' key attrs body
235
236 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
237
238 Tree0 ts -> xmlTokens ts
239
240 xmlAbout ::
241 InhXml ->
242 Cell Key -> Seq (Cell (XmlName, Text)) ->
243 TCTs -> XMLs
244 xmlAbout inh key attrs body =
245 Seq.singleton $
246 xmlKey inh key attrs $
247 case Seq.viewl (inhXml_titles inh) of
248 (Seq.viewl -> Cell bt _et _ :< _) :< _ ->
249 ((<$> inhXml_titles inh) $ \title ->
250 TreeN (Cell bt bt $ KeyColon "title" "") $
251 Seq.singleton $ Tree0 title)
252 <> body
253 _ -> body
254
255 xmlKey :: InhXml -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
256 xmlKey inh (Cell bp ep key) attrs ts =
257 case key of
258 KeyColon n _wh -> d_key n
259 KeyGreat n _wh -> d_key n
260 KeyEqual n _wh -> d_key n
261 KeyBar n _wh -> d_key n
262 KeyDot _n -> TreeN (cell "li") $ xmlTCTs inh ts
263 KeyDash -> TreeN (cell "li") $ xmlTCTs inh ts
264 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
265 where
266 com :: TL.Text
267 com =
268 Write.text Write.config_text $
269 TreeSeq.mapAlsoKey
270 (cell1 . unCell)
271 (\_path -> fmap $ cell1 . unCell) <$> ts
272 KeyLower n as -> TreeN (cell "artwork") $ xmlTCTs inh ts
273 KeyBrackets ident ->
274 let inh' = inh{inhXml_figure = False} in
275 TreeN (cell "reference") $
276 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) attrs) <>
277 xmlTCTs inh' ts
278 KeyDotSlash p ->
279 TreeN (cell "include") $
280 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
281 xmlTCTs inh ts
282 where
283 cell :: a -> Cell a
284 cell = Cell bp ep
285 d_key :: Text -> XML
286 d_key n =
287 TreeN (cell $ xmlLocalName n) $
288 xmlAttrs attrs <>
289 xmlTCTs inh ts
290
291 xmlTokens :: Tokens -> XMLs
292 xmlTokens tok = goTokens tok
293 where
294 go :: Cell Token -> XMLs
295 go (Cell bp ep tk) =
296 case tk of
297 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
298 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
299 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
300 TokenLink lnk -> Seq.singleton $
301 TreeN (cell "eref") $
302 xmlAttrs [cell ("to",lnk)] |>
303 Tree0 (cell $ XmlText lnk)
304 TokenPair PairBracket ts | to <- Write.t_Tokens ts
305 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
306 Seq.singleton $
307 TreeN (cell "rref") $
308 xmlAttrs [cell ("to",TL.toStrict to)]
309 TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ goTokens ts
310 TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ goTokens ts
311 TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ goTokens ts
312 TokenPair PairFrenchquote toks@ts ->
313 Seq.singleton $
314 TreeN (cell "q") $
315 case ts of
316 (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) ->
317 case Seq.viewr ls of
318 m :> Cell br er (TokenPlain r) ->
319 goTokens $
320 Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))
321 <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))
322 _ ->
323 goTokens $
324 Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls
325 (Seq.viewr -> rs :> Cell br er (TokenPlain r)) ->
326 goTokens $
327 rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r))
328 _ -> goTokens toks
329 TokenPair PairHash (toList -> [unCell -> TokenPlain t]) ->
330 Seq.singleton $
331 TreeN (cell "ref") $
332 xmlAttrs [cell ("to",t)]
333 TokenPair (PairElem name attrs) ts ->
334 Seq.singleton $
335 TreeN (cell $ xmlLocalName name) $
336 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) -> cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
337 goTokens ts
338 TokenPair p ts ->
339 let (o,c) = pairBorders p ts in
340 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
341 goTokens ts `unionXml`
342 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
343 where
344 cell :: a -> Cell a
345 cell = Cell bp ep
346
347 goTokens :: Tokens -> XMLs
348 goTokens toks =
349 case Seq.viewl toks of
350 Cell bp _ep (TokenPair PairParen paren)
351 :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket)
352 :< ts) ->
353 (<| goTokens ts) $
354 case bracket of
355 (toList -> [Cell bl el (TokenLink lnk)]) ->
356 TreeN (Cell bp eb "eref") $
357 xmlAttrs [Cell bl el ("to",lnk)] <>
358 goTokens paren
359 _ ->
360 TreeN (Cell bp eb "rref") $
361 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Write.t_Tokens bracket)] <>
362 goTokens paren
363 t :< ts -> go t `unionXml` goTokens ts
364 Seq.EmptyL -> mempty
365
366 -- | Unify two 'XMLs', merging border 'XmlText's if any.
367 unionXml :: XMLs -> XMLs -> XMLs
368 unionXml x y =
369 case (Seq.viewr x, Seq.viewl y) of
370 (xs :> x0, y0 :< ys) ->
371 case (x0,y0) of
372 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
373 xs `unionXml`
374 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
375 ys
376 _ -> x <> y
377 (Seq.EmptyR, _) -> y
378 (_, Seq.EmptyL) -> x
379
380
381 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
382 spanlBar name = first unKeyBar . spanBar
383 where
384 unKeyBar :: TCTs -> TCTs
385 unKeyBar = (=<<) $ \case
386 TreeN (unCell -> KeyBar{}) ts -> ts
387 _ -> mempty
388 spanBar =
389 Seq.spanl $ \case
390 TreeN (unCell -> KeyBar n _) _ | n == name -> True
391 _ -> False
392
393 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
394 spanlItems liKey ts =
395 let (lis, ts') = spanLIs ts in
396 foldl' accumLIs (mempty,ts') lis
397 where
398 spanLIs = Seq.spanl $ \case
399 TreeN (unCell -> liKey -> True) _ -> True
400 Tree0 toks ->
401 (`any` toks) $ \case
402 (unCell -> TokenPair (PairElem "li" _) _) -> True
403 _ -> False
404 {-
405 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
406 [unCell -> TokenPair (PairElem "li" _) _] -> True
407 _ -> False
408 -}
409 _ -> False
410 accumLIs acc@(oks,kos) t =
411 case t of
412 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
413 Tree0 toks ->
414 let (ok,ko) =
415 (`Seq.spanl` toks) $ \tok ->
416 case unCell tok of
417 TokenPair (PairElem "li" _) _ -> True
418 TokenPlain txt -> Char.isSpace`Text.all`txt
419 _ -> False in
420 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
421 , if null ko then kos else Tree0 ko<|kos )
422 _ -> acc
423 rmTokenPlain =
424 Seq.filter $ \case
425 (unCell -> TokenPlain{}) -> False
426 _ -> True
427
428 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
429 spanlKeyColon name =
430 Seq.spanl $ \case
431 TreeN (unCell -> KeyBar n _) _ -> n == name
432 TreeN (unCell -> KeyGreat n _) _ -> n == name
433 _ -> False
434
435 spanlBrackets :: TCTs -> (TCTs, TCTs)
436 spanlBrackets =
437 Seq.spanl $ \case
438 TreeN (unCell -> KeyBrackets{}) _ -> True
439 _ -> False
440
441 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
442 spanlTokens =
443 first ((\case
444 Tree0 ts -> ts
445 _ -> undefined) <$>) .
446 Seq.spanl (\case
447 Tree0{} -> True
448 _ -> False)
449
450 getAttrId :: TCTs -> Text
451 getAttrId ts =
452 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
453 Just (Tree0 toks) -> TL.toStrict $ Write.t_Tokens toks
454 _ -> ""
455
456 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
457 setXmlAttr a@(unCell -> (k, _v)) as =
458 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
459 Just idx -> Seq.update idx a as
460 Nothing -> a <| as
461
462 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
463 defXmlAttr a@(unCell -> (k, _v)) as =
464 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
465 Just _idx -> as
466 Nothing -> a <| as
467
468 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
469 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
470
471 {-
472 xmlAttr :: XmlAttrs -> (Text,Attr) -> XmlAttrs
473 xmlAttr acc (_,Attr{..}) = Map.insert (xmlLocalName attr_name) attr_value acc
474 -- TODO: conflict
475 -}
476
477 {-
478 d_Attributes :: XmlAttrs -> DTC -> DTC
479 d_Attributes = flip $ Map.foldrWithKey $ \n v ->
480 B.AddCustomAttribute (B.Text n) (B.Text v)
481 -}
482
483 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
484 partitionAttributesChildren ts = (attrs,cs)
485 where
486 (as,cs) = (`Seq.partition` ts) $ \case
487 TreeN (unCell -> KeyEqual{}) _cs -> True
488 _ -> False
489 attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as
490 attr = \case
491 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
492 Cell bp ep (xmlLocalName n, v)
493 where
494 v = TL.toStrict $
495 Write.text Write.config_text{Write.config_text_escape = False} $
496 TreeSeq.mapAlsoKey (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a
497 _ -> undefined
498
499 elems :: Set Text
500 elems =
501 [ "about"
502 , "abstract"
503 , "address"
504 , "alias"
505 , "annotation"
506 , "area"
507 , "artwork"
508 , "aside"
509 , "audio"
510 , "author"
511 , "authors"
512 , "bcp14"
513 , "br"
514 , "call"
515 , "city"
516 , "code"
517 , "comment"
518 , "comments"
519 , "country"
520 , "date"
521 , "dd"
522 , "define"
523 , "del"
524 , "div"
525 , "dl"
526 , "document"
527 , "dt"
528 , "editor"
529 , "email"
530 , "embed"
531 , "eref"
532 , "fax"
533 , "feed"
534 , "feedback"
535 , "figure"
536 , "filter"
537 , "format"
538 , "from"
539 , "h"
540 , "hi"
541 , "html5"
542 , "i"
543 , "index"
544 , "iref"
545 , "keyword"
546 , "li"
547 , "link"
548 , "name"
549 , "note"
550 , "ol"
551 , "organization"
552 , "para"
553 , "postamble"
554 , "preamble"
555 , "q"
556 , "ref"
557 , "reference"
558 , "region"
559 , "rl"
560 , "rref"
561 , "sc"
562 , "section"
563 , "serie"
564 , "source"
565 , "span"
566 , "street"
567 , "style"
568 , "sub"
569 , "sup"
570 , "table"
571 , "tbody"
572 , "td"
573 , "tel"
574 , "tfoot"
575 , "title"
576 , "th"
577 , "thead"
578 , "toc"
579 , "tof"
580 , "tr"
581 , "tt"
582 , "ul"
583 , "uri"
584 , "version"
585 , "video"
586 , "workgroup"
587 , "xml"
588 , "zipcode"
589 ]