]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Use Tree for Token.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Arrow (first)
10 import Control.Monad (Monad(..), (=<<))
11 import Data.Bool
12 import Data.Default.Class (Default(..))
13 import Data.Eq (Eq(..))
14 import Data.Foldable (null, foldl', any)
15 import Data.Function (($), (.), id)
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Maybe (Maybe(..), maybe)
18 import Data.Monoid (Monoid(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
21 import Data.Set (Set)
22 import Data.Text (Text)
23 import Data.TreeSeq.Strict (Tree(..))
24 import GHC.Exts (toList)
25 import Prelude (undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text as Text
30 import qualified Data.Text.Lazy as TL
31 import qualified Language.TCT.Write.Plain as Plain
32 import qualified System.FilePath as FP
33
34 import Text.Blaze.XML ()
35 import Language.TCT hiding (Parser)
36 import Language.XML
37 import qualified Data.TreeSeq.Strict as TreeSeq
38
39 -- * Type 'Inh'
40 data Inh
41 = Inh
42 { inh_figure :: Bool
43 , inh_tree0 :: [Pos -> XMLs -> XML]
44 , inh_titles :: Seq Tokens
45 }
46 instance Default Inh where
47 def = Inh
48 { inh_figure = False
49 , inh_tree0 = []
50 , inh_titles = mempty
51 }
52
53 -- * Class 'Xmlify'
54 class Xmlify a where
55 xmlify :: Inh -> a -> XMLs
56 instance Xmlify TCTs where
57 xmlify inh_orig = go inh_orig
58 where
59 go :: Inh -> TCTs -> XMLs
60 go inh trees =
61 case Seq.viewl trees of
62 TreeN (Cell bp ep (KeyBar n _)) _ :< _
63 | (body,ts) <- spanlBar n trees
64 , not (null body) ->
65 (<| go inh ts) $
66 TreeN (Cell bp ep "artwork") $
67 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
68 body >>= xmlify inh{inh_tree0=[]}
69
70 TreeN key@(unCell -> KeyColon n _) cs :< ts
71 | (cs',ts') <- spanlKeyColon n ts
72 , not (null cs') ->
73 go inh $ TreeN key (cs<>cs') <| ts'
74
75 TreeN (Cell bp ep KeyBrackets{}) _ :< _
76 | (rl,ts) <- spanlBrackets trees
77 , not (null rl) ->
78 (<| go inh ts) $
79 TreeN (Cell bp ep "references") $
80 rl >>= xmlify inh_orig
81
82 _ | (ul,ts) <- spanlItems (==KeyDash) trees
83 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
84 (<| go inh ts) $
85 TreeN (Cell bp ep "ul") $
86 ul >>= xmlify inh{inh_tree0=List.repeat xmlPara}
87
88 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
89 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
90 (<| go inh ts) $
91 TreeN (Cell bp ep "ol") $
92 ol >>= xmlify inh{inh_tree0=List.repeat xmlPara}
93
94 t@(Tree0 toks) :< ts ->
95 case inh_tree0 inh of
96 [] -> xmlify inh_orig t <> go inh ts
97 _ | isTokenElem toks -> xmlify inh_orig t <> go inh ts
98 tree0:inh_tree0 ->
99 (case Seq.viewl toks of
100 EmptyL -> id
101 (posTree -> bp) :< _ -> (tree0 bp (xmlify inh_orig t) <|)) $
102 go inh{inh_tree0} ts
103
104 t:<ts ->
105 xmlify inh_orig t <>
106 go inh ts
107
108 _ -> mempty
109 instance Xmlify TCT where
110 xmlify inh tr =
111 case tr of
112 TreeN (Cell bp ep KeySection{}) ts ->
113 let (attrs,body) = partitionAttributesChildren ts in
114 let inh' = inh
115 { inh_tree0 = xmlTitle : List.repeat xmlPara
116 , inh_figure = True
117 } in
118 Seq.singleton $
119 TreeN (Cell bp ep "section") $
120 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
121 xmlify inh' body
122
123 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
124 let (attrs,body) = partitionAttributesChildren ts in
125 let inh' = inh { inh_tree0 =
126 case kn of
127 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
128 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
129 "serie" -> List.repeat xmlName
130 "author" -> List.repeat xmlName
131 "editor" -> List.repeat xmlName
132 "org" -> List.repeat xmlName
133 _ -> []
134 } in
135 case () of
136 _ | kn == "about" -> xmlAbout inh' key attrs body
137
138 _ | inh_figure inh && not (kn`List.elem`elems) ->
139 Seq.singleton $
140 TreeN (Cell bp ep "figure") $
141 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
142 case toList body of
143 [Tree0{}] -> xmlify inh'{inh_tree0 = List.repeat xmlPara} body
144 _ -> xmlify inh'{inh_tree0 = xmlTitle : List.repeat xmlPara} body
145
146 _ -> Seq.singleton $ xmlKey inh' key attrs body
147
148 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
149
150 Tree0 ts -> xmlify inh ts
151 instance Xmlify Tokens where
152 xmlify inh toks =
153 case Seq.viewl toks of
154 TreeN (Cell bp _ep PairParen) paren
155 :< (Seq.viewl -> TreeN (Cell bb eb PairBracket) bracket
156 :< ts) ->
157 (<| xmlify inh ts) $
158 case bracket of
159 (toList -> [Tree0 (Cell bl el (TokenLink lnk))]) ->
160 TreeN (Cell bp eb "eref") $
161 xmlAttrs [Cell bl el ("to",lnk)] <>
162 xmlify inh paren
163 _ ->
164 TreeN (Cell bp eb "rref") $
165 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Plain.textify bracket)] <>
166 xmlify inh paren
167 t :< ts -> xmlify inh t `unionXml` xmlify inh ts
168 Seq.EmptyL -> mempty
169 instance Xmlify Token where
170 xmlify inh (TreeN (Cell bp ep p) ts) =
171 case p of
172 PairBracket | to <- Plain.textify ts
173 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
174 Seq.singleton $
175 TreeN (cell "rref") $
176 xmlAttrs [cell ("to",TL.toStrict to)]
177 PairStar -> Seq.singleton $ TreeN (cell "b") $ xmlify inh ts
178 PairSlash -> Seq.singleton $ TreeN (cell "i") $ xmlify inh ts
179 PairBackquote -> Seq.singleton $ TreeN (cell "code") $ xmlify inh ts
180 PairFrenchquote ->
181 Seq.singleton $
182 TreeN (cell "q") $
183 case ts of
184 (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) ->
185 case Seq.viewr ls of
186 m :> Tree0 (Cell br er (TokenPlain r)) ->
187 xmlify inh $
188 Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l)))
189 <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r))))
190 _ ->
191 xmlify inh $
192 Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls
193 (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) ->
194 xmlify inh $
195 rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r)))
196 _ -> xmlify inh ts
197 PairHash ->
198 Seq.singleton $
199 TreeN (cell "ref") $
200 xmlAttrs [cell ("to",TL.toStrict $ Plain.textify ts)]
201 PairElem name attrs ->
202 Seq.singleton $
203 TreeN (cell $ xmlLocalName name) $
204 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) ->
205 cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
206 xmlify inh ts
207 _ ->
208 let (o,c) = pairBorders p ts in
209 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
210 xmlify inh ts `unionXml`
211 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
212 where
213 cell :: a -> Cell a
214 cell = Cell bp ep
215 xmlify _inh (Tree0 (Cell bp ep tok)) =
216 case tok of
217 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
218 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
219 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
220 TokenLink lnk -> Seq.singleton $
221 TreeN (cell "eref") $
222 xmlAttrs [cell ("to",lnk)]
223 where
224 cell :: a -> Cell a
225 cell = Cell bp ep
226
227 mimetype :: Text -> Maybe Text
228 mimetype "hs" = Just "text/x-haskell"
229 mimetype "sh" = Just "text/x-shellscript"
230 mimetype "shell" = Just "text/x-shellscript"
231 mimetype "shellscript" = Just "text/x-shellscript"
232 mimetype _ = Nothing
233
234 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
235 xmlPhantom n bp = TreeN (Cell bp bp n)
236 xmlPara :: Pos -> XMLs -> XML
237 xmlPara = xmlPhantom "para"
238 xmlTitle :: Pos -> XMLs -> XML
239 xmlTitle = xmlPhantom "title"
240 xmlName :: Pos -> XMLs -> XML
241 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
242 xmlName bp ts = xmlPhantom "name" bp ts
243
244 xmlDocument :: TCTs -> XMLs
245 xmlDocument trees =
246 case Seq.viewl trees of
247 TreeN (unCell -> KeySection{}) vs :< ts ->
248 case spanlTokens vs of
249 (titles@(Seq.viewl -> (Seq.viewl -> (posTree -> bp) :< _) :< _), vs') ->
250 let vs'' =
251 case Seq.findIndexL
252 (\case
253 TreeN (unCell -> KeyColon "about" _) _ -> True
254 _ -> False) vs' of
255 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
256 Just{} -> vs' in
257 xmlify def
258 { inh_titles = titles
259 , inh_figure = True
260 , inh_tree0 = List.repeat xmlPara
261 } vs'' <>
262 xmlify def ts
263 _ -> xmlify def trees
264 _ -> xmlify def trees
265
266 xmlAbout ::
267 Inh ->
268 Cell Key -> Seq (Cell (XmlName, Text)) ->
269 TCTs -> XMLs
270 xmlAbout inh key attrs body =
271 Seq.singleton $
272 xmlKey inh key attrs $
273 case Seq.viewl (inh_titles inh) of
274 (Seq.viewl -> (posTree -> bt) :< _) :< _ ->
275 ((<$> inh_titles inh) $ \title ->
276 TreeN (Cell bt bt $ KeyColon "title" "") $
277 Seq.singleton $ Tree0 title)
278 <> body
279 _ -> body
280
281 xmlKey :: Inh -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
282 xmlKey inh (Cell bp ep key) attrs ts =
283 case key of
284 KeyColon n _wh -> d_key n
285 KeyGreat n _wh -> d_key n
286 KeyEqual n _wh -> d_key n
287 KeyBar n _wh -> d_key n
288 KeyDot _n -> TreeN (cell "li") $ xmlify inh ts
289 KeyDash -> TreeN (cell "li") $ xmlify inh ts
290 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
291 where
292 com :: TL.Text
293 com =
294 Plain.text def $
295 TreeSeq.mapAlsoNode
296 (cell1 . unCell)
297 (\_k -> fmap $
298 TreeSeq.mapAlsoNode
299 (cell1 . unCell)
300 (\_k' -> cell1 . unCell)) <$> ts
301 KeyLower n as -> TreeN (cell "artwork") $ xmlify inh ts
302 KeyBrackets ident ->
303 let inh' = inh{inh_figure = False} in
304 let (attrs',body) = partitionAttributesChildren ts in
305 TreeN (cell "reference") $
306 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) (attrs<>attrs')) <>
307 xmlify inh'{inh_tree0 = xmlTitle : xmlTitle : List.repeat xmlPara} body
308 KeyDotSlash p ->
309 TreeN (cell "include") $
310 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
311 xmlify inh ts
312 where
313 cell :: a -> Cell a
314 cell = Cell bp ep
315 d_key :: Text -> XML
316 d_key n =
317 TreeN (cell $ xmlLocalName n) $
318 xmlAttrs attrs <>
319 xmlify inh ts
320
321 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
322 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
323
324 -- | Unify two 'XMLs', merging border 'XmlText's if any.
325 unionXml :: XMLs -> XMLs -> XMLs
326 unionXml x y =
327 case (Seq.viewr x, Seq.viewl y) of
328 (xs :> x0, y0 :< ys) ->
329 case (x0,y0) of
330 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
331 xs `unionXml`
332 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
333 ys
334 _ -> x <> y
335 (Seq.EmptyR, _) -> y
336 (_, Seq.EmptyL) -> x
337
338 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
339 spanlBar name = first unKeyBar . spanBar
340 where
341 unKeyBar :: TCTs -> TCTs
342 unKeyBar = (=<<) $ \case
343 TreeN (unCell -> KeyBar{}) ts -> ts
344 _ -> mempty
345 spanBar =
346 Seq.spanl $ \case
347 TreeN (unCell -> KeyBar n _) _ | n == name -> True
348 _ -> False
349
350 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
351 spanlItems liKey ts =
352 let (lis, ts') = spanLIs ts in
353 foldl' accumLIs (mempty,ts') lis
354 where
355 spanLIs = Seq.spanl $ \case
356 TreeN (unCell -> liKey -> True) _ -> True
357 Tree0 toks ->
358 (`any` toks) $ \case
359 TreeN (unCell -> PairElem "li" _) _ -> True
360 _ -> False
361 {-
362 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
363 [unCell -> TokenPair (PairElem "li" _) _] -> True
364 _ -> False
365 -}
366 _ -> False
367 accumLIs acc@(oks,kos) t =
368 case t of
369 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
370 Tree0 toks ->
371 let (ok,ko) =
372 (`Seq.spanl` toks) $ \case
373 TreeN (unCell -> PairElem "li" _) _ -> True
374 Tree0 (unCell -> TokenPlain txt) -> Char.isSpace`Text.all`txt
375 _ -> False in
376 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
377 , if null ko then kos else Tree0 ko<|kos )
378 _ -> acc
379 rmTokenPlain =
380 Seq.filter $ \case
381 (Tree0 (unCell -> TokenPlain{})) -> False
382 _ -> True
383
384 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
385 spanlKeyColon name =
386 Seq.spanl $ \case
387 TreeN (unCell -> KeyBar n _) _ -> n == name
388 TreeN (unCell -> KeyGreat n _) _ -> n == name
389 _ -> False
390
391 spanlBrackets :: TCTs -> (TCTs, TCTs)
392 spanlBrackets =
393 Seq.spanl $ \case
394 TreeN (unCell -> KeyBrackets{}) _ -> True
395 _ -> False
396
397 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
398 spanlTokens =
399 first ((\case
400 Tree0 ts -> ts
401 _ -> undefined) <$>) .
402 Seq.spanl (\case
403 Tree0{} -> True
404 _ -> False)
405
406 getAttrId :: TCTs -> Text
407 getAttrId ts =
408 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
409 Just (Tree0 toks) -> TL.toStrict $ Plain.textify toks
410 _ -> ""
411
412 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
413 setXmlAttr a@(unCell -> (k, _v)) as =
414 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
415 Just idx -> Seq.update idx a as
416 Nothing -> a <| as
417
418 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
419 defXmlAttr a@(unCell -> (k, _v)) as =
420 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
421 Just _idx -> as
422 Nothing -> a <| as
423
424 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
425 partitionAttributesChildren ts = (attrs,cs)
426 where
427 (as,cs) = (`Seq.partition` ts) $ \case
428 TreeN (unCell -> KeyEqual{}) _cs -> True
429 _ -> False
430 attrs = attr <$> as
431 attr = \case
432 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
433 Cell bp ep (xmlLocalName n, v)
434 where
435 v = TL.toStrict $
436 Plain.text def{Plain.state_escape = False} $
437 TreeSeq.mapAlsoNode
438 (cell1 . unCell)
439 (\_k -> fmap $
440 TreeSeq.mapAlsoNode
441 (cell1 . unCell)
442 (\_k' -> cell1 . unCell)) <$> a
443 _ -> undefined
444
445 elems :: Set Text
446 elems =
447 [ "about"
448 , "abstract"
449 , "address"
450 , "alias"
451 , "annotation"
452 , "area"
453 , "artwork"
454 , "aside"
455 , "audio"
456 , "author"
457 , "authors"
458 , "bcp14"
459 , "br"
460 , "call"
461 , "city"
462 , "code"
463 , "comment"
464 , "comments"
465 , "country"
466 , "date"
467 , "dd"
468 , "define"
469 , "del"
470 , "div"
471 , "dl"
472 , "document"
473 , "dt"
474 , "editor"
475 , "email"
476 , "embed"
477 , "eref"
478 , "fax"
479 , "feed"
480 , "feedback"
481 , "figure"
482 , "filter"
483 , "format"
484 , "from"
485 , "h"
486 , "hi"
487 , "html5"
488 , "i"
489 , "index"
490 , "iref"
491 , "keyword"
492 , "li"
493 , "link"
494 , "name"
495 , "note"
496 , "ol"
497 , "organization"
498 , "para"
499 , "postamble"
500 , "preamble"
501 , "q"
502 , "ref"
503 , "reference"
504 , "references"
505 , "region"
506 , "rref"
507 , "sc"
508 , "section"
509 , "serie"
510 , "source"
511 , "span"
512 , "street"
513 , "style"
514 , "sub"
515 , "sup"
516 , "table"
517 , "tbody"
518 , "td"
519 , "tel"
520 , "tfoot"
521 , "title"
522 , "th"
523 , "thead"
524 , "toc"
525 , "tof"
526 , "tr"
527 , "tt"
528 , "u"
529 , "ul"
530 , "uri"
531 , "version"
532 , "video"
533 , "workgroup"
534 , "xml"
535 , "zipcode"
536 ]