]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Split TCT -> DTC parsing into TCT -> XML -> DTC.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.TCT.Write.XML where
9
10 import Control.Arrow (first)
11 import Control.Monad (Monad(..), (=<<))
12 import Data.Bool
13 import Data.Eq (Eq(..))
14 import Data.Foldable (null, foldl', any)
15 import Data.Function (($), (.), id)
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Maybe (Maybe(..), maybe)
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
22 import Data.Set (Set)
23 import Data.String (IsString(..))
24 import Data.Text (Text)
25 import Data.TreeSeq.Strict (Tree(..))
26 import GHC.Exts (toList)
27 import Prelude (error, undefined)
28 import Text.Show (Show(..), showChar, showString)
29 import qualified Data.Char as Char
30 import qualified Data.List as List
31 import qualified Data.Sequence as Seq
32 import qualified Data.Text as Text
33 import qualified Data.Text.Lazy as TL
34 import qualified Language.TCT.Write.Text as Write
35 import qualified System.FilePath as FP
36
37 import Language.TCT hiding (Parser)
38 import qualified Data.TreeSeq.Strict as TreeSeq
39
40
41 -- * Type 'XML'
42 type XML = Tree (Cell XmlName) (Cell XmlLeaf)
43 type XMLs = Seq XML
44
45 -- ** Type 'XmlName'
46 data XmlName
47 = XmlName
48 { xmlNamePrefix :: Text
49 , xmlNameSpace :: Text
50 , xmlNameLocal :: Text
51 }
52 instance Show XmlName where
53 showsPrec _p XmlName{xmlNameSpace="", ..} =
54 showString (Text.unpack xmlNameLocal)
55 showsPrec _p XmlName{..} =
56 if Text.null xmlNameSpace
57 then showString (Text.unpack xmlNameLocal)
58 else
59 showChar '{' .
60 showString (Text.unpack xmlNameSpace) .
61 showChar '}' .
62 showString (Text.unpack xmlNameLocal)
63 instance Eq XmlName where
64 XmlName _ sx lx == XmlName _ sy ly = sx == sy && lx == ly
65 instance Ord XmlName where
66 XmlName _ sx lx `compare` XmlName _ sy ly = compare sx sy <> compare lx ly
67 instance IsString XmlName where
68 fromString "" = XmlName "" "" ""
69 fromString full@('{':rest) =
70 case List.break (== '}') rest of
71 (_, "") -> error ("Invalid Clark notation: " <> show full)
72 (ns, local) -> XmlName "" (Text.pack ns) (Text.pack $ List.drop 1 local)
73 fromString local = XmlName "" "" (Text.pack local)
74
75 xmlLocalName :: Text -> XmlName
76 xmlLocalName = XmlName "" ""
77
78 -- ** Type 'XmlLeaf'
79 data XmlLeaf
80 = XmlAttr XmlName Text
81 | XmlComment Text
82 | XmlText Text
83 deriving (Eq,Ord,Show)
84
85 -- * Type 'InhXml'
86 data InhXml
87 = InhXml
88 { inhXml_figure :: Bool
89 , inhXml_tree0 :: [Pos -> XMLs -> XML]
90 , inhXml_titles :: Seq Tokens
91 }
92 inhXml :: InhXml
93 inhXml = InhXml
94 { inhXml_figure = False
95 , inhXml_tree0 = []
96 , inhXml_titles = mempty
97 }
98
99 mimetype :: Text -> Maybe Text
100 mimetype "hs" = Just "text/x-haskell"
101 mimetype "sh" = Just "text/x-shellscript"
102 mimetype "shell" = Just "text/x-shellscript"
103 mimetype "shellscript" = Just "text/x-shellscript"
104 mimetype _ = Nothing
105
106 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
107 xmlPhantom n bp = TreeN (Cell bp bp n)
108 xmlPara :: Pos -> XMLs -> XML
109 xmlPara = xmlPhantom "para"
110 xmlTitle :: Pos -> XMLs -> XML
111 xmlTitle = xmlPhantom "title"
112 xmlName :: Pos -> XMLs -> XML
113 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
114 xmlName bp ts = xmlPhantom "name" bp ts
115
116 xmlDocument :: TCTs -> XMLs
117 xmlDocument trees =
118 case Seq.viewl trees of
119 TreeN (unCell -> KeySection{}) vs :< ts ->
120 case spanlTokens vs of
121 (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') ->
122 let vs'' =
123 case Seq.findIndexL
124 (\case
125 TreeN (unCell -> KeyColon "about" _) _ -> True
126 _ -> False) vs' of
127 Just{} -> vs'
128 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
129 in
130 xmlTCTs inhXml
131 { inhXml_titles = titles
132 , inhXml_tree0 = List.repeat xmlPara
133 } vs'' <>
134 xmlTCTs inhXml ts
135 _ -> xmlTCTs inhXml trees
136 _ -> xmlTCTs inhXml trees
137
138 xmlTCTs :: InhXml -> TCTs -> XMLs
139 xmlTCTs inh_orig = go inh_orig
140 where
141 go :: InhXml -> TCTs -> XMLs
142 go inh trees =
143 case Seq.viewl trees of
144 TreeN (Cell bp ep (KeyBar n _)) _ :< _
145 | (body,ts) <- spanlBar n trees
146 , not (null body) ->
147 (<| go inh ts) $
148 TreeN (Cell bp ep "artwork") $
149 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
150 body >>= xmlTCT inh{inhXml_tree0=[]}
151
152 TreeN key@(unCell -> KeyColon n _) cs :< ts
153 | (cs',ts') <- spanlKeyColon n ts
154 , not (null cs') ->
155 go inh $ TreeN key (cs<>cs') <| ts'
156
157 TreeN (Cell bp ep KeyBrackets{}) _ :< _
158 | (rl,ts) <- spanlBrackets trees
159 , not (null rl) ->
160 (<| go inh ts) $
161 TreeN (Cell bp ep "rl") $
162 rl >>= xmlTCT inh_orig
163
164 _ | (ul,ts) <- spanlItems (==KeyDash) trees
165 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
166 (<| go inh ts) $
167 TreeN (Cell bp ep "ul") $
168 ul >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
169
170 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
171 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
172 (<| go inh ts) $
173 TreeN (Cell bp ep "ol") $
174 ol >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
175
176 t@(Tree0 toks) :< ts | isTokenElem toks ->
177 xmlTCT inh_orig t <>
178 go inh ts
179
180 t@(Tree0 toks) :< ts ->
181 case inhXml_tree0 inh of
182 [] ->
183 xmlTCT inh_orig t <>
184 go inh{inhXml_tree0=[]} ts
185 x:xs ->
186 case Seq.viewl toks of
187 EmptyL -> go inh{inhXml_tree0=xs} ts
188 Cell bp _ep _ :< _ ->
189 (<| go inh{inhXml_tree0=xs} ts) $
190 x bp $
191 xmlTCT inh_orig t
192
193 t:<ts ->
194 xmlTCT inh_orig t <>
195 go inh ts
196
197 _ -> mempty
198
199 xmlTCT :: InhXml -> TCT -> XMLs
200 xmlTCT inh tr =
201 case tr of
202 TreeN (Cell bp ep KeySection{}) ts ->
203 let (attrs,body) = partitionAttributesChildren ts in
204 let inh' = inh{inhXml_tree0 = xmlTitle : List.repeat xmlPara} in
205 Seq.singleton $
206 TreeN (Cell bp ep "section") $
207 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
208 xmlTCTs inh' body
209 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
210 let (attrs,body) = partitionAttributesChildren ts in
211 let inh' = inh { inhXml_tree0 =
212 case kn of
213 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
214 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
215 "quote" -> xmlTitle : List.repeat xmlPara
216 "author" -> List.repeat xmlName
217 _ -> []
218 } in
219 case () of
220 _ | kn == "about" -> xmlAbout inh' key attrs body
221 _ | inhXml_figure inh && not (kn`List.elem`elems) ->
222 TreeN (Cell bp ep "figure")
223 (xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs)) <|
224 case toList body of
225 [Tree0{}] -> xmlTCTs inh'{inhXml_tree0 = List.repeat xmlPara} body
226 _ -> xmlTCTs inh'{inhXml_tree0 = xmlTitle : List.repeat xmlPara} body
227 _ -> Seq.singleton $ xmlKey inh' key attrs body
228 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
229 Tree0 ts -> xmlTokens ts
230
231 xmlAbout ::
232 InhXml ->
233 Cell Key -> Seq (Cell (XmlName, Text)) ->
234 TCTs -> XMLs
235 xmlAbout inh key attrs body =
236 Seq.singleton $
237 xmlKey inh key attrs $
238 case Seq.viewl (inhXml_titles inh) of
239 (Seq.viewl -> Cell bt _et _ :< _) :< _ ->
240 ((<$> inhXml_titles inh) $ \title ->
241 TreeN (Cell bt bt $ KeyColon "title" "") $
242 Seq.singleton $ Tree0 title)
243 <> body
244 _ -> body
245
246 xmlKey :: InhXml -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
247 xmlKey inh (Cell bp ep key) attrs ts =
248 case key of
249 KeyColon n _wh -> d_key n
250 KeyGreat n _wh -> d_key n
251 KeyEqual n _wh -> d_key n
252 KeyBar n _wh -> d_key n
253 KeyDot _n -> TreeN (cell "li") $ xmlTCTs inh ts
254 KeyDash -> TreeN (cell "li") $ xmlTCTs inh ts
255 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
256 where
257 com :: TL.Text
258 com =
259 Write.text Write.config_text $
260 TreeSeq.mapAlsoKey
261 (cell1 . unCell)
262 (\_path -> fmap $ cell1 . unCell) <$> ts
263 KeyLower n as -> TreeN (cell "artwork") $ xmlTCTs inh ts
264 KeyBrackets ident ->
265 let inh' = inh{inhXml_figure = False} in
266 TreeN (cell "reference") $
267 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) attrs) <>
268 xmlTCTs inh' ts
269 KeyDotSlash p ->
270 TreeN (cell "include") $
271 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
272 xmlTCTs inh ts
273 where
274 cell :: a -> Cell a
275 cell = Cell bp ep
276 d_key :: Text -> XML
277 d_key n =
278 TreeN (cell $ xmlLocalName n) $
279 xmlAttrs attrs <>
280 xmlTCTs inh ts
281
282 xmlTokens :: Tokens -> XMLs
283 xmlTokens tok = goTokens tok
284 where
285 go :: Cell Token -> XMLs
286 go (Cell bp ep tk) =
287 case tk of
288 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
289 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
290 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
291 TokenLink lnk -> Seq.singleton $
292 TreeN (cell "eref") $
293 xmlAttrs [cell ("to",lnk)] |>
294 Tree0 (cell $ XmlText lnk)
295 TokenPair PairBracket ts | to <- Write.t_Tokens ts
296 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
297 Seq.singleton $
298 TreeN (cell "rref") $
299 xmlAttrs [cell ("to",TL.toStrict to)]
300 TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ goTokens ts
301 TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ goTokens ts
302 TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ goTokens ts
303 TokenPair PairFrenchquote toks@ts ->
304 Seq.singleton $
305 TreeN (cell "q") $
306 case ts of
307 (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) ->
308 case Seq.viewr ls of
309 m :> Cell br er (TokenPlain r) ->
310 goTokens $
311 Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))
312 <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))
313 _ ->
314 goTokens $
315 Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls
316 (Seq.viewr -> rs :> Cell br er (TokenPlain r)) ->
317 goTokens $
318 rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r))
319 _ -> goTokens toks
320 TokenPair PairHash (toList -> [unCell -> TokenPlain t]) ->
321 Seq.singleton $
322 TreeN (cell "ref") $
323 xmlAttrs [cell ("to",t)]
324 TokenPair (PairElem name attrs) ts ->
325 Seq.singleton $
326 TreeN (cell $ xmlLocalName name) $
327 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) -> cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
328 goTokens ts
329 TokenPair p ts ->
330 let (o,c) = pairBorders p ts in
331 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
332 goTokens ts `unionXml`
333 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
334 where
335 cell :: a -> Cell a
336 cell = Cell bp ep
337
338 goTokens :: Tokens -> XMLs
339 goTokens toks =
340 case Seq.viewl toks of
341 Cell bp _ep (TokenPair PairParen paren)
342 :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket)
343 :< ts) ->
344 (<| goTokens ts) $
345 case bracket of
346 (toList -> [Cell bl el (TokenLink lnk)]) ->
347 TreeN (Cell bp eb "eref") $
348 xmlAttrs [Cell bl el ("to",lnk)] <>
349 goTokens paren
350 _ ->
351 TreeN (Cell bp eb "rref") $
352 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Write.t_Tokens bracket)] <>
353 goTokens paren
354 t :< ts -> go t `unionXml` goTokens ts
355 Seq.EmptyL -> mempty
356
357 -- | Unify two 'XMLs', merging border 'XmlText's if any.
358 unionXml :: XMLs -> XMLs -> XMLs
359 unionXml x y =
360 case (Seq.viewr x, Seq.viewl y) of
361 (xs :> x0, y0 :< ys) ->
362 case (x0,y0) of
363 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
364 xs `unionXml`
365 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
366 ys
367 _ -> x <> y
368 (Seq.EmptyR, _) -> y
369 (_, Seq.EmptyL) -> x
370
371
372 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
373 spanlBar name = first unKeyBar . spanBar
374 where
375 unKeyBar :: TCTs -> TCTs
376 unKeyBar = (=<<) $ \case
377 TreeN (unCell -> KeyBar{}) ts -> ts
378 _ -> mempty
379 spanBar =
380 Seq.spanl $ \case
381 TreeN (unCell -> KeyBar n _) _ | n == name -> True
382 _ -> False
383
384 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
385 spanlItems liKey ts =
386 let (lis, ts') = spanLIs ts in
387 foldl' accumLIs (mempty,ts') lis
388 where
389 spanLIs = Seq.spanl $ \case
390 TreeN (unCell -> liKey -> True) _ -> True
391 Tree0 toks ->
392 (`any` toks) $ \case
393 (unCell -> TokenPair (PairElem "li" _) _) -> True
394 _ -> False
395 {-
396 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
397 [unCell -> TokenPair (PairElem "li" _) _] -> True
398 _ -> False
399 -}
400 _ -> False
401 accumLIs acc@(oks,kos) t =
402 case t of
403 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
404 Tree0 toks ->
405 let (ok,ko) =
406 (`Seq.spanl` toks) $ \tok ->
407 case unCell tok of
408 TokenPair (PairElem "li" _) _ -> True
409 TokenPlain txt -> Char.isSpace`Text.all`txt
410 _ -> False in
411 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
412 , if null ko then kos else Tree0 ko<|kos )
413 _ -> acc
414 rmTokenPlain =
415 Seq.filter $ \case
416 (unCell -> TokenPlain{}) -> False
417 _ -> True
418
419 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
420 spanlKeyColon name =
421 Seq.spanl $ \case
422 TreeN (unCell -> KeyBar n _) _ -> n == name
423 TreeN (unCell -> KeyGreat n _) _ -> n == name
424 _ -> False
425
426 spanlBrackets :: TCTs -> (TCTs, TCTs)
427 spanlBrackets =
428 Seq.spanl $ \case
429 TreeN (unCell -> KeyBrackets{}) _ -> True
430 _ -> False
431
432 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
433 spanlTokens =
434 first ((\case
435 Tree0 ts -> ts
436 _ -> undefined) <$>) .
437 Seq.spanl (\case
438 Tree0{} -> True
439 _ -> False)
440
441 getAttrId :: TCTs -> Text
442 getAttrId ts =
443 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
444 Just (Tree0 toks) -> TL.toStrict $ Write.t_Tokens toks
445 _ -> ""
446
447 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
448 setXmlAttr a@(unCell -> (k, _v)) as =
449 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
450 Just idx -> Seq.update idx a as
451 Nothing -> a <| as
452
453 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
454 defXmlAttr a@(unCell -> (k, _v)) as =
455 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
456 Just _idx -> as
457 Nothing -> a <| as
458
459 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
460 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
461
462 {-
463 xmlAttr :: XmlAttrs -> (Text,Attr) -> XmlAttrs
464 xmlAttr acc (_,Attr{..}) = Map.insert (xmlLocalName attr_name) attr_value acc
465 -- TODO: conflict
466 -}
467
468 {-
469 d_Attributes :: XmlAttrs -> DTC -> DTC
470 d_Attributes = flip $ Map.foldrWithKey $ \n v ->
471 B.AddCustomAttribute (B.Text n) (B.Text v)
472 -}
473
474 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
475 partitionAttributesChildren ts = (attrs,cs)
476 where
477 (as,cs) = (`Seq.partition` ts) $ \case
478 TreeN (unCell -> KeyEqual{}) _cs -> True
479 _ -> False
480 attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as
481 attr = \case
482 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
483 Cell bp ep (xmlLocalName n, v)
484 where
485 v = TL.toStrict $
486 Write.text Write.config_text{Write.config_text_escape = False} $
487 TreeSeq.mapAlsoKey (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a
488 _ -> undefined
489
490 elems :: Set Text
491 elems =
492 [ "about"
493 , "abstract"
494 , "address"
495 , "alias"
496 , "annotation"
497 , "area"
498 , "artwork"
499 , "aside"
500 , "audio"
501 , "author"
502 , "authors"
503 , "bcp14"
504 , "br"
505 , "call"
506 , "city"
507 , "code"
508 , "comment"
509 , "comments"
510 , "country"
511 , "date"
512 , "dd"
513 , "define"
514 , "del"
515 , "div"
516 , "dl"
517 , "document"
518 , "dt"
519 , "editor"
520 , "email"
521 , "embed"
522 , "eref"
523 , "fax"
524 , "feed"
525 , "feedback"
526 , "figure"
527 , "filter"
528 , "format"
529 , "from"
530 , "h"
531 , "hi"
532 , "html5"
533 , "i"
534 , "index"
535 , "iref"
536 , "keyword"
537 , "li"
538 , "link"
539 , "note"
540 , "ol"
541 , "organization"
542 , "para"
543 , "postamble"
544 , "preamble"
545 , "q"
546 , "quote"
547 , "ref"
548 , "reference"
549 , "region"
550 , "rl"
551 , "rref"
552 , "sc"
553 , "section"
554 , "serie"
555 , "source"
556 , "span"
557 , "street"
558 , "style"
559 , "sub"
560 , "sup"
561 , "table"
562 , "tbody"
563 , "td"
564 , "tel"
565 , "tfoot"
566 , "th"
567 , "thead"
568 , "toc"
569 , "tof"
570 , "tr"
571 , "tt"
572 , "ul"
573 , "uri"
574 , "video"
575 , "workgroup"
576 , "xml"
577 , "zipcode"
578 ]