]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Add Xmlify.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Arrow (first)
10 import Control.Monad (Monad(..), (=<<))
11 import Data.Bool
12 import Data.Default.Class (Default(..))
13 import Data.Eq (Eq(..))
14 import Data.Foldable (null, foldl', any)
15 import Data.Function (($), (.), id)
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Maybe (Maybe(..), maybe)
18 import Data.Monoid (Monoid(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
21 import Data.Set (Set)
22 import Data.Text (Text)
23 import Data.TreeSeq.Strict (Tree(..))
24 import GHC.Exts (toList)
25 import Prelude (undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text as Text
30 import qualified Data.Text.Lazy as TL
31 import qualified Language.TCT.Write.Plain as Plain
32 import qualified System.FilePath as FP
33
34 import Text.Blaze.XML ()
35 import Language.TCT hiding (Parser)
36 import Language.XML
37 import qualified Data.TreeSeq.Strict as TreeSeq
38
39 -- * Type 'Inh'
40 data Inh
41 = Inh
42 { inh_figure :: Bool
43 , inh_tree0 :: [Pos -> XMLs -> XML]
44 , inh_titles :: Seq Tokens
45 }
46 instance Default Inh where
47 def = Inh
48 { inh_figure = False
49 , inh_tree0 = []
50 , inh_titles = mempty
51 }
52
53 -- * Class 'Xmlify'
54 class Xmlify a where
55 xmlify :: Inh -> a -> XMLs
56 instance Xmlify TCTs where
57 xmlify inh_orig = go inh_orig
58 where
59 go :: Inh -> TCTs -> XMLs
60 go inh trees =
61 case Seq.viewl trees of
62 TreeN (Cell bp ep (KeyBar n _)) _ :< _
63 | (body,ts) <- spanlBar n trees
64 , not (null body) ->
65 (<| go inh ts) $
66 TreeN (Cell bp ep "artwork") $
67 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
68 body >>= xmlify inh{inh_tree0=[]}
69
70 TreeN key@(unCell -> KeyColon n _) cs :< ts
71 | (cs',ts') <- spanlKeyColon n ts
72 , not (null cs') ->
73 go inh $ TreeN key (cs<>cs') <| ts'
74
75 TreeN (Cell bp ep KeyBrackets{}) _ :< _
76 | (rl,ts) <- spanlBrackets trees
77 , not (null rl) ->
78 (<| go inh ts) $
79 TreeN (Cell bp ep "references") $
80 rl >>= xmlify inh_orig
81
82 _ | (ul,ts) <- spanlItems (==KeyDash) trees
83 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
84 (<| go inh ts) $
85 TreeN (Cell bp ep "ul") $
86 ul >>= xmlify inh{inh_tree0=List.repeat xmlPara}
87
88 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
89 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
90 (<| go inh ts) $
91 TreeN (Cell bp ep "ol") $
92 ol >>= xmlify inh{inh_tree0=List.repeat xmlPara}
93
94 t@(Tree0 toks) :< ts | isTokenElem toks ->
95 xmlify inh_orig t <>
96 go inh ts
97
98 t@(Tree0 toks) :< ts ->
99 case inh_tree0 inh of
100 [] ->
101 xmlify inh_orig t <>
102 go inh{inh_tree0=[]} ts
103 x:xs ->
104 case Seq.viewl toks of
105 EmptyL -> go inh{inh_tree0=xs} ts
106 Cell bp _ep _ :< _ ->
107 (<| go inh{inh_tree0=xs} ts) $
108 x bp $
109 xmlify inh_orig t
110
111 t:<ts ->
112 xmlify inh_orig t <>
113 go inh ts
114
115 _ -> mempty
116 instance Xmlify TCT where
117 xmlify inh tr =
118 case tr of
119 TreeN (Cell bp ep KeySection{}) ts ->
120 let (attrs,body) = partitionAttributesChildren ts in
121 let inh' = inh
122 { inh_tree0 = xmlTitle : List.repeat xmlPara
123 , inh_figure = True
124 } in
125 Seq.singleton $
126 TreeN (Cell bp ep "section") $
127 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
128 xmlify inh' body
129
130 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
131 let (attrs,body) = partitionAttributesChildren ts in
132 let inh' = inh { inh_tree0 =
133 case kn of
134 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
135 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
136 "serie" -> List.repeat xmlName
137 "author" -> List.repeat xmlName
138 "editor" -> List.repeat xmlName
139 "org" -> List.repeat xmlName
140 _ -> []
141 } in
142 case () of
143 _ | kn == "about" -> xmlAbout inh' key attrs body
144
145 _ | inh_figure inh && not (kn`List.elem`elems) ->
146 Seq.singleton $
147 TreeN (Cell bp ep "figure") $
148 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
149 case toList body of
150 [Tree0{}] -> xmlify inh'{inh_tree0 = List.repeat xmlPara} body
151 _ -> xmlify inh'{inh_tree0 = xmlTitle : List.repeat xmlPara} body
152
153 _ -> Seq.singleton $ xmlKey inh' key attrs body
154
155 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
156
157 Tree0 ts -> xmlify inh ts
158 instance Xmlify Tokens where
159 xmlify inh toks =
160 case Seq.viewl toks of
161 Cell bp _ep (TokenPair PairParen paren)
162 :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket)
163 :< ts) ->
164 (<| xmlify inh ts) $
165 case bracket of
166 (toList -> [Cell bl el (TokenLink lnk)]) ->
167 TreeN (Cell bp eb "eref") $
168 xmlAttrs [Cell bl el ("to",lnk)] <>
169 xmlify inh paren
170 _ ->
171 TreeN (Cell bp eb "rref") $
172 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Plain.textify bracket)] <>
173 xmlify inh paren
174 t :< ts -> xmlify inh t `unionXml` xmlify inh ts
175 Seq.EmptyL -> mempty
176 instance Xmlify (Cell Token) where
177 xmlify inh (Cell bp ep tk) =
178 case tk of
179 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
180 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
181 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
182 TokenLink lnk -> Seq.singleton $
183 TreeN (cell "eref") $
184 xmlAttrs [cell ("to",lnk)]
185 TokenPair PairBracket ts | to <- Plain.textify ts
186 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
187 Seq.singleton $
188 TreeN (cell "rref") $
189 xmlAttrs [cell ("to",TL.toStrict to)]
190 TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ xmlify inh ts
191 TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ xmlify inh ts
192 TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ xmlify inh ts
193 TokenPair PairFrenchquote toks@ts ->
194 Seq.singleton $
195 TreeN (cell "q") $
196 case ts of
197 (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) ->
198 case Seq.viewr ls of
199 m :> Cell br er (TokenPlain r) ->
200 xmlify inh $
201 Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))
202 <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))
203 _ ->
204 xmlify inh $
205 Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls
206 (Seq.viewr -> rs :> Cell br er (TokenPlain r)) ->
207 xmlify inh $
208 rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r))
209 _ -> xmlify inh toks
210 TokenPair PairHash to ->
211 Seq.singleton $
212 TreeN (cell "ref") $
213 xmlAttrs [cell ("to",TL.toStrict $ Plain.textify to)]
214 TokenPair (PairElem name attrs) ts ->
215 Seq.singleton $
216 TreeN (cell $ xmlLocalName name) $
217 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) -> cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
218 xmlify inh ts
219 TokenPair p ts ->
220 let (o,c) = pairBorders p ts in
221 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
222 xmlify inh ts `unionXml`
223 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
224 where
225 cell :: a -> Cell a
226 cell = Cell bp ep
227
228 mimetype :: Text -> Maybe Text
229 mimetype "hs" = Just "text/x-haskell"
230 mimetype "sh" = Just "text/x-shellscript"
231 mimetype "shell" = Just "text/x-shellscript"
232 mimetype "shellscript" = Just "text/x-shellscript"
233 mimetype _ = Nothing
234
235 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
236 xmlPhantom n bp = TreeN (Cell bp bp n)
237 xmlPara :: Pos -> XMLs -> XML
238 xmlPara = xmlPhantom "para"
239 xmlTitle :: Pos -> XMLs -> XML
240 xmlTitle = xmlPhantom "title"
241 xmlName :: Pos -> XMLs -> XML
242 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
243 xmlName bp ts = xmlPhantom "name" bp ts
244
245 xmlDocument :: TCTs -> XMLs
246 xmlDocument trees =
247 case Seq.viewl trees of
248 TreeN (unCell -> KeySection{}) vs :< ts ->
249 case spanlTokens vs of
250 (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') ->
251 let vs'' =
252 case Seq.findIndexL
253 (\case
254 TreeN (unCell -> KeyColon "about" _) _ -> True
255 _ -> False) vs' of
256 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
257 Just{} -> vs' in
258 xmlify def
259 { inh_titles = titles
260 , inh_figure = True
261 , inh_tree0 = List.repeat xmlPara
262 } vs'' <>
263 xmlify def ts
264 _ -> xmlify def trees
265 _ -> xmlify def trees
266
267 xmlAbout ::
268 Inh ->
269 Cell Key -> Seq (Cell (XmlName, Text)) ->
270 TCTs -> XMLs
271 xmlAbout inh key attrs body =
272 Seq.singleton $
273 xmlKey inh key attrs $
274 case Seq.viewl (inh_titles inh) of
275 (Seq.viewl -> Cell bt _et _ :< _) :< _ ->
276 ((<$> inh_titles inh) $ \title ->
277 TreeN (Cell bt bt $ KeyColon "title" "") $
278 Seq.singleton $ Tree0 title)
279 <> body
280 _ -> body
281
282 xmlKey :: Inh -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
283 xmlKey inh (Cell bp ep key) attrs ts =
284 case key of
285 KeyColon n _wh -> d_key n
286 KeyGreat n _wh -> d_key n
287 KeyEqual n _wh -> d_key n
288 KeyBar n _wh -> d_key n
289 KeyDot _n -> TreeN (cell "li") $ xmlify inh ts
290 KeyDash -> TreeN (cell "li") $ xmlify inh ts
291 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
292 where
293 com :: TL.Text
294 com =
295 Plain.text def $
296 TreeSeq.mapAlsoNode
297 (cell1 . unCell)
298 (\_path -> fmap $ cell1 . unCell) <$> ts
299 KeyLower n as -> TreeN (cell "artwork") $ xmlify inh ts
300 KeyBrackets ident ->
301 let inh' = inh{inh_figure = False} in
302 let (attrs',body) = partitionAttributesChildren ts in
303 TreeN (cell "reference") $
304 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) (attrs<>attrs')) <>
305 xmlify inh'{inh_tree0 = xmlTitle : xmlTitle : List.repeat xmlPara} body
306 KeyDotSlash p ->
307 TreeN (cell "include") $
308 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
309 xmlify inh ts
310 where
311 cell :: a -> Cell a
312 cell = Cell bp ep
313 d_key :: Text -> XML
314 d_key n =
315 TreeN (cell $ xmlLocalName n) $
316 xmlAttrs attrs <>
317 xmlify inh ts
318
319 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
320 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
321
322 -- | Unify two 'XMLs', merging border 'XmlText's if any.
323 unionXml :: XMLs -> XMLs -> XMLs
324 unionXml x y =
325 case (Seq.viewr x, Seq.viewl y) of
326 (xs :> x0, y0 :< ys) ->
327 case (x0,y0) of
328 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
329 xs `unionXml`
330 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
331 ys
332 _ -> x <> y
333 (Seq.EmptyR, _) -> y
334 (_, Seq.EmptyL) -> x
335
336 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
337 spanlBar name = first unKeyBar . spanBar
338 where
339 unKeyBar :: TCTs -> TCTs
340 unKeyBar = (=<<) $ \case
341 TreeN (unCell -> KeyBar{}) ts -> ts
342 _ -> mempty
343 spanBar =
344 Seq.spanl $ \case
345 TreeN (unCell -> KeyBar n _) _ | n == name -> True
346 _ -> False
347
348 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
349 spanlItems liKey ts =
350 let (lis, ts') = spanLIs ts in
351 foldl' accumLIs (mempty,ts') lis
352 where
353 spanLIs = Seq.spanl $ \case
354 TreeN (unCell -> liKey -> True) _ -> True
355 Tree0 toks ->
356 (`any` toks) $ \case
357 (unCell -> TokenPair (PairElem "li" _) _) -> True
358 _ -> False
359 {-
360 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
361 [unCell -> TokenPair (PairElem "li" _) _] -> True
362 _ -> False
363 -}
364 _ -> False
365 accumLIs acc@(oks,kos) t =
366 case t of
367 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
368 Tree0 toks ->
369 let (ok,ko) =
370 (`Seq.spanl` toks) $ \tok ->
371 case unCell tok of
372 TokenPair (PairElem "li" _) _ -> True
373 TokenPlain txt -> Char.isSpace`Text.all`txt
374 _ -> False in
375 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
376 , if null ko then kos else Tree0 ko<|kos )
377 _ -> acc
378 rmTokenPlain =
379 Seq.filter $ \case
380 (unCell -> TokenPlain{}) -> False
381 _ -> True
382
383 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
384 spanlKeyColon name =
385 Seq.spanl $ \case
386 TreeN (unCell -> KeyBar n _) _ -> n == name
387 TreeN (unCell -> KeyGreat n _) _ -> n == name
388 _ -> False
389
390 spanlBrackets :: TCTs -> (TCTs, TCTs)
391 spanlBrackets =
392 Seq.spanl $ \case
393 TreeN (unCell -> KeyBrackets{}) _ -> True
394 _ -> False
395
396 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
397 spanlTokens =
398 first ((\case
399 Tree0 ts -> ts
400 _ -> undefined) <$>) .
401 Seq.spanl (\case
402 Tree0{} -> True
403 _ -> False)
404
405 getAttrId :: TCTs -> Text
406 getAttrId ts =
407 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
408 Just (Tree0 toks) -> TL.toStrict $ Plain.textify toks
409 _ -> ""
410
411 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
412 setXmlAttr a@(unCell -> (k, _v)) as =
413 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
414 Just idx -> Seq.update idx a as
415 Nothing -> a <| as
416
417 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
418 defXmlAttr a@(unCell -> (k, _v)) as =
419 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
420 Just _idx -> as
421 Nothing -> a <| as
422
423 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
424 partitionAttributesChildren ts = (attrs,cs)
425 where
426 (as,cs) = (`Seq.partition` ts) $ \case
427 TreeN (unCell -> KeyEqual{}) _cs -> True
428 _ -> False
429 attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as
430 attr = \case
431 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
432 Cell bp ep (xmlLocalName n, v)
433 where
434 v = TL.toStrict $
435 Plain.text def{Plain.state_escape = False} $
436 TreeSeq.mapAlsoNode (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a
437 _ -> undefined
438
439 elems :: Set Text
440 elems =
441 [ "about"
442 , "abstract"
443 , "address"
444 , "alias"
445 , "annotation"
446 , "area"
447 , "artwork"
448 , "aside"
449 , "audio"
450 , "author"
451 , "authors"
452 , "bcp14"
453 , "br"
454 , "call"
455 , "city"
456 , "code"
457 , "comment"
458 , "comments"
459 , "country"
460 , "date"
461 , "dd"
462 , "define"
463 , "del"
464 , "div"
465 , "dl"
466 , "document"
467 , "dt"
468 , "editor"
469 , "email"
470 , "embed"
471 , "eref"
472 , "fax"
473 , "feed"
474 , "feedback"
475 , "figure"
476 , "filter"
477 , "format"
478 , "from"
479 , "h"
480 , "hi"
481 , "html5"
482 , "i"
483 , "index"
484 , "iref"
485 , "keyword"
486 , "li"
487 , "link"
488 , "name"
489 , "note"
490 , "ol"
491 , "organization"
492 , "para"
493 , "postamble"
494 , "preamble"
495 , "q"
496 , "ref"
497 , "reference"
498 , "references"
499 , "region"
500 , "rref"
501 , "sc"
502 , "section"
503 , "serie"
504 , "source"
505 , "span"
506 , "street"
507 , "style"
508 , "sub"
509 , "sup"
510 , "table"
511 , "tbody"
512 , "td"
513 , "tel"
514 , "tfoot"
515 , "title"
516 , "th"
517 , "thead"
518 , "toc"
519 , "tof"
520 , "tr"
521 , "tt"
522 , "u"
523 , "ul"
524 , "uri"
525 , "version"
526 , "video"
527 , "workgroup"
528 , "xml"
529 , "zipcode"
530 ]