]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Fix NodePara parsing.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Arrow (first)
10 import Control.Monad (Monad(..), (=<<))
11 import Data.Bool
12 import Data.Default.Class (Default(..))
13 import Data.Eq (Eq(..))
14 import Data.Foldable (Foldable(..))
15 import Data.Function (($), (.), id)
16 import Data.Functor ((<$>), (<$))
17 import Data.Maybe (Maybe(..), maybe, fromMaybe)
18 import Data.Monoid (Monoid(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|))
21 import Data.Set (Set)
22 import Data.TreeSeq.Strict (Tree(..))
23 import Data.Tuple (uncurry)
24 import Prelude (undefined)
25 import qualified Data.Char as Char
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text.Lazy as TL
29 import qualified Language.TCT.Write.Plain as Plain
30 import qualified System.FilePath as FP
31
32 import Text.Blaze.XML ()
33 import Language.TCT hiding (Parser)
34 import Language.TCT.Debug
35 import Language.XML
36
37 xmlDocument :: Roots -> XMLs
38 xmlDocument trees =
39 -- (`S.evalState` def) $
40 case Seq.viewl trees of
41 Tree (unCell -> NodeHeader HeaderSection{}) vs :< ts ->
42 case spanlNodeToken vs of
43 (titles@(Seq.viewl -> (unTree -> cell_begin -> bp) :< _), vs') ->
44 let vs'' =
45 case Seq.findIndexL
46 (\case
47 Tree (unCell -> NodeHeader (HeaderColon "about" _)) _ -> True
48 _ -> False) vs' of
49 Nothing -> Tree (Cell bp bp $ NodeHeader $ HeaderColon "about" "") mempty <| vs'
50 Just{} -> vs' in
51 xmlify def
52 { inh_titles = titles
53 , inh_figure = True
54 , inh_para = List.repeat xmlPara
55 } vs'' <>
56 xmlify def ts
57 _ -> xmlify def trees
58 _ -> xmlify def trees
59
60 {-
61 -- * Type 'Xmls'
62 type Xmls = S.State State XMLs
63 type Xml = S.State State XML
64 instance Semigroup Xmls where
65 (<>) = liftA2 (<>)
66 instance Monoid Xmls where
67 mempty = return mempty
68 mappend = (<>)
69
70 -- * Type 'State'
71 data State
72 = State
73 { state_pos :: Pos
74 }
75 instance Default State where
76 def = State
77 { state_pos = pos1
78 }
79 -}
80
81 -- * Type 'Inh'
82 data Inh
83 = Inh
84 { inh_figure :: Bool
85 , inh_para :: [Cell () -> XMLs -> XML]
86 , inh_titles :: Roots
87 }
88 instance Default Inh where
89 def = Inh
90 { inh_figure = False
91 , inh_para = []
92 , inh_titles = mempty
93 }
94
95 {-
96 newtype Merge a = Merge a
97 deriving (Functor)
98 instance Semigroup (Merge Roots) where
99 (<>) = unionTokens
100 instance Monad (Merge Roots) where
101 return = Merge
102 Merge m >>= f =
103 foldMap nn
104 -}
105
106 -- * Class 'Xmlify'
107 class Xmlify a where
108 xmlify :: Inh -> a -> XMLs
109 instance Xmlify Roots where
110 xmlify inh roots =
111 case Seq.viewl roots of
112 EmptyL -> mempty
113 l@(Tree cel@(Cell bp _ep nod) ts) :< rs ->
114 case nod of
115 NodeHeader (HeaderBar n _wh)
116 | (span, rest) <- spanlHeaderBar n roots ->
117 let (attrs,body) = partitionAttrs span in
118 (<| xmlify inh rest) $
119 element "artwork" $
120 xmlAttrs (attrs `defaultAttr` Cell bp bp ("type", fromMaybe "text/plain" $ mimetype n)) <>
121 xmlify inh{inh_para=[]} body
122 ----------------------
123 NodeHeader (HeaderGreat n _wh)
124 | (span, rest) <- spanlHeaderGreat n roots ->
125 let (attrs,body) = partitionAttrs span in
126 (<| xmlify inh rest) $
127 element "artwork" $
128 xmlAttrs (attrs `defaultAttr` Cell bp bp ("type", fromMaybe "text/plain" $ mimetype n)) <>
129 xmlify inh{inh_para=[]} (debug0 "body" body)
130 ----------------------
131 NodeHeader (HeaderColon n _wh)
132 | (span, rest) <- spanlHeaderColon n rs
133 , not $ null span ->
134 xmlify inh $ Tree cel (ts<>span) <| rest
135 ----------------------
136 NodeHeader HeaderBrackets{}
137 | (span,rest) <- spanlBrackets roots
138 , not (null span) ->
139 (<| xmlify inh rest) $
140 element "references" $
141 xmlify inh span
142 ----------------------
143 NodeText x
144 | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs ->
145 xmlify inh $ Tree (NodeText <$> (x <$ cel) <> (y <$ cy)) (ts <> ys) <| rs'
146 ----------------------
147 NodePair PairParen
148 | Tree (Cell bb eb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs ->
149 (<| xmlify inh rs') $
150 case bracket of
151 (toList -> [unTree -> Cell bl el (NodeToken (TokenLink lnk))]) ->
152 element "eref" $
153 xmlAttrs [Cell bl el ("to",lnk)] <>
154 xmlify inh ts
155 _ ->
156 element "rref" $
157 xmlAttrs [Cell bb eb ("to",Plain.plainDocument bracket)] <>
158 xmlify inh ts
159 ----------------------
160 _ | (span, rest) <- spanlItems (==HeaderDash) roots
161 , not $ null span ->
162 (<| xmlify inh rest) $
163 element "ul" $
164 span >>= xmlify inh{inh_para=List.repeat xmlPara}
165 ----------------------
166 _ | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
167 , not $ null span ->
168 (<| xmlify inh rest) $
169 element "ol" $
170 span >>= xmlify inh{inh_para=List.repeat xmlPara}
171 ----------------------
172 _ ->
173 xmlify inh l <>
174 xmlify inh rs
175 where
176 element :: XmlName -> XMLs -> XML
177 element n = tree (XmlElem n <$ cel)
178 {-
179 t@(Tree (NodePair (PairElem))) :< ts ->
180 case inh_para inh of
181 [] -> xmlify inh t <> go inh ts
182 _ | isTokenElem toks -> xmlify inh t <> go inh ts
183 tree0:inh_para ->
184 (case Seq.viewl toks of
185 EmptyL -> id
186 (unTree -> cell_begin -> bp) :< _ -> (tree0 bp (xmlify inh t) <|)) $
187 go inh{inh_para} ts
188 -}
189 instance Xmlify Root where
190 xmlify inh (Tree cel@(Cell bp ep nod) ts) =
191 case nod of
192 NodeGroup -> xmlify inh ts
193 ----------------------
194 NodePara ->
195 case inh_para inh of
196 [] -> xmlify inh ts
197 para:inh_para ->
198 Seq.singleton $
199 para (() <$ cel) $
200 xmlify inh{inh_para} ts
201 ----------------------
202 NodeHeader hdr ->
203 case hdr of
204 HeaderSection{} ->
205 let (attrs,body) = partitionAttrs ts in
206 let inh' = inh
207 { inh_para = xmlTitle : List.repeat xmlPara
208 , inh_figure = True
209 } in
210 Seq.singleton $
211 element "section" $
212 xmlAttrs (attrs `defaultAttr` Cell ep ep ("id",getAttrId body)) <>
213 xmlify inh' body
214 HeaderColon kn _wh ->
215 let (attrs,body) = partitionAttrs ts in
216 let inh' = inh { inh_para =
217 case kn of
218 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
219 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
220 "serie" -> List.repeat xmlName
221 "author" -> List.repeat xmlName
222 "editor" -> List.repeat xmlName
223 "org" -> List.repeat xmlName
224 _ -> []
225 } in
226 case () of
227 _ | kn == "about" -> xmlAbout inh' cel {-attrs-} body
228 _ | inh_figure inh && not (kn`List.elem`elems) ->
229 Seq.singleton $
230 element "figure" $
231 xmlAttrs (setAttr (Cell ep ep ("type",kn)) attrs) <>
232 case toList body of
233 [Tree0{}] -> xmlify inh'{inh_para = List.repeat xmlPara} body
234 _ -> xmlify inh'{inh_para = xmlTitle : List.repeat xmlPara} body
235 _ ->
236 Seq.singleton $
237 element (xmlLocalName kn) $
238 xmlAttrs attrs <>
239 xmlify inh' ts
240 HeaderGreat n _wh -> Seq.singleton $ element (xmlLocalName n) $ xmlify inh ts
241 HeaderEqual n _wh -> Seq.singleton $ element (xmlLocalName n) $ xmlify inh ts
242 HeaderBar n _wh -> Seq.singleton $ element (xmlLocalName n) $ xmlify inh ts
243 HeaderDot _n -> Seq.singleton $ element "li" $ xmlify inh ts
244 HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts
245 HeaderDashDash -> Seq.singleton $ Tree0 $ cell $ XmlComment $
246 -- debug1_ ("TS", ts) $
247 -- debug1_ ("RS", (S.evalState (Plain.rackUpLeft ts) Nothing)) $
248 Plain.plainDocument ts
249 -- Plain.text def $ S.evalState (Plain.rackUpLeft ts) Nothing
250 {-
251 TreeSeq.mapAlsoNode
252 (cell1 . unCell)
253 (\_k -> fmap $
254 TreeSeq.mapAlsoNode
255 (cell1 . unCell)
256 (\_k' -> cell1 . unCell)) <$> ts
257 -}
258 HeaderBrackets ident ->
259 let inh' = inh{inh_figure = False} in
260 let (attrs,body) = partitionAttrs ts in
261 Seq.singleton $
262 element "reference" $
263 xmlAttrs (setAttr (Cell ep ep ("id",ident)) attrs) <>
264 xmlify inh'{inh_para = xmlTitle : xmlTitle : List.repeat xmlPara} body
265 HeaderDotSlash p ->
266 Seq.singleton $
267 element "include" $
268 xmlAttrs [cell ("href",TL.pack $ FP.replaceExtension p "dtc")] <>
269 xmlify inh ts
270 ----------------------
271 NodePair pair ->
272 case pair of
273 PairBracket | to <- Plain.plainDocument ts
274 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
275 Seq.singleton $
276 element "rref" $
277 xmlAttrs [cell ("to",to)]
278 PairStar -> Seq.singleton $ element "b" $ xmlify inh ts
279 PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts
280 PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts
281 PairFrenchquote ->
282 Seq.singleton $
283 element "q" $
284 xmlify inh ts
285 {-
286 case ts of
287 (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) ->
288 case Seq.viewr ls of
289 m :> Tree0 (Cell br er (TokenPlain r)) ->
290 xmlify inh $
291 Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l)))
292 <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r))))
293 _ ->
294 xmlify inh $
295 Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls
296 (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) ->
297 xmlify inh $
298 rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r)))
299 _ -> xmlify inh ts
300 -}
301 PairHash ->
302 Seq.singleton $
303 element "ref" $
304 xmlAttrs [cell ("to",Plain.plainDocument ts)]
305 PairElem name attrs ->
306 Seq.singleton $
307 element (xmlLocalName name) $
308 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
309 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
310 xmlify inh ts
311 _ ->
312 let (open, close) = pairBorders pair ts in
313 Seq.singleton (Tree0 $ Cell bp bp $ XmlText open) `unionXml`
314 xmlify inh ts `unionXml`
315 Seq.singleton (Tree0 $ Cell ep ep $ XmlText close)
316 ----------------------
317 NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
318 ----------------------
319 NodeToken tok ->
320 case tok of
321 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c
322 TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t
323 TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)]
324 TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)]
325 ----------------------
326 NodeLower n as ->
327 Seq.singleton $
328 element "artwork" $
329 xmlify inh ts
330 where
331 cell :: a -> Cell a
332 cell = Cell bp ep
333 element :: XmlName -> XMLs -> XML
334 element n = tree (cell $ XmlElem n)
335
336 -- | TODO: add more mimetypes
337 mimetype :: TL.Text -> Maybe TL.Text
338 mimetype "txt" = Just "text/plain"
339 mimetype "plain" = Just "text/plain"
340 mimetype "hs" = Just "text/x-haskell"
341 mimetype "sh" = Just "text/x-shellscript"
342 mimetype "shell" = Just "text/x-shellscript"
343 mimetype "shellscript" = Just "text/x-shellscript"
344 mimetype _ = Nothing
345
346 xmlPara :: Cell a -> XMLs -> XML
347 xmlPara c = tree (XmlElem "para" <$ c)
348
349 xmlTitle :: Cell a -> XMLs -> XML
350 xmlTitle c = tree (XmlElem "title" <$ c)
351
352 xmlName :: Cell a -> XMLs -> XML
353 -- xmlName bp (toList -> [unTree -> unCell -> XmlText t]) = Tree0 $ Cell bp bp $ XmlAttr "name" t
354 xmlName c = tree (XmlElem "name" <$ c)
355
356 xmlAbout ::
357 Inh ->
358 Cell Node ->
359 -- Seq (Cell (XmlName, Text)) ->
360 Roots -> XMLs
361 xmlAbout inh nod body =
362 xmlify inh $ Tree nod $
363 case Seq.viewl (inh_titles inh) of
364 (unTree -> cell_begin -> bt) :< _ ->
365 ((<$> inh_titles inh) $ \title ->
366 Tree (Cell bt bt $ NodeHeader $ HeaderColon "title" "") $
367 Seq.singleton $ title)
368 <> body
369 _ -> body
370
371 xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs
372 xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>)
373
374 -- | Unify two 'XMLs', merging border 'XmlText's if any.
375 unionXml :: XMLs -> XMLs -> XMLs
376 unionXml x y =
377 case (Seq.viewr x, Seq.viewl y) of
378 (xs :> x0, y0 :< ys) ->
379 case (x0,y0) of
380 ( Tree0 (Cell bx ex (XmlText tx))
381 , Tree0 (Cell by ey (XmlText ty)) ) ->
382 xs `unionXml`
383 Seq.singleton (Tree0 $ (XmlText <$>) $ Cell bx ex tx <> Cell by ey ty) `unionXml`
384 ys
385 _ -> x <> y
386 (Seq.EmptyR, _) -> y
387 (_, Seq.EmptyL) -> x
388
389 unionsXml :: Foldable f => f XMLs -> XMLs
390 unionsXml = foldl' unionXml mempty
391
392 partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots)
393 partitionAttrs ts = (attrs,cs)
394 where
395 (as,cs) = (`Seq.partition` ts) $ \case
396 Tree (unCell -> NodeHeader (HeaderEqual (TL.null -> False) _wh)) _cs -> True
397 _ -> False
398 attrs = attr <$> as
399 attr = \case
400 Tree (Cell bp ep (NodeHeader (HeaderEqual n _wh))) a ->
401 Cell bp ep (xmlLocalName n, v)
402 where
403 v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
404 _ -> undefined
405
406 spanlHeaderBar :: Name -> Roots -> (Roots, Roots)
407 spanlHeaderBar name = first unHeaderBar . debug0 "spanBar" . spanBar
408 -- FIXME: use unTree
409 where
410 unHeaderBar :: Roots -> Roots
411 unHeaderBar = (=<<) $ \case
412 Tree (unCell -> NodeHeader HeaderBar{}) ts -> ts
413 ts -> return ts
414 spanBar =
415 Seq.spanl $ \case
416 Tree (unCell -> NodeHeader (HeaderBar n _)) _ | n == name -> True
417 Tree (unCell -> NodeHeader (HeaderGreat n _)) _ | n == name -> True
418 _ -> False
419
420 spanlHeaderGreat :: Name -> Roots -> (Roots, Roots)
421 spanlHeaderGreat name = first unHeaderGreat . debug0 "spanGreat" . spanGreat
422 -- FIXME: use unTree
423 where
424 unHeaderGreat :: Roots -> Roots
425 unHeaderGreat = (=<<) $ \case
426 Tree (unCell -> NodeHeader HeaderGreat{}) ts -> ts
427 ts -> return ts
428 spanGreat =
429 Seq.spanl $ \case
430 Tree (unCell -> NodeHeader (HeaderGreat n _)) _ | n == name -> True
431 _ -> False
432
433 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
434 spanlItems liHeader =
435 Seq.spanl $ \(unTree -> unCell -> nod) ->
436 case nod of
437 NodeHeader (HeaderColon "li" _wh) -> True
438 NodeHeader hdr -> liHeader hdr
439 NodePair (PairElem "li" _as) -> True
440 _ -> False
441
442 spanlHeaderColon :: Name -> Roots -> (Roots, Roots)
443 spanlHeaderColon name =
444 Seq.spanl $ \case
445 Tree (unCell -> NodeHeader (HeaderBar n _)) _ -> n == name
446 Tree (unCell -> NodeHeader (HeaderGreat n _)) _ -> n == name
447 _ -> False
448
449 spanlBrackets :: Roots -> (Roots, Roots)
450 spanlBrackets =
451 Seq.spanl $ \case
452 Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True
453 _ -> False
454
455 spanlNodeToken :: Roots -> (Roots, Roots)
456 spanlNodeToken =
457 Seq.spanl (\case
458 Tree (unCell -> NodeToken{}) _ -> True
459 _ -> False)
460
461 getAttrId :: Roots -> TL.Text
462 getAttrId ts =
463 case Seq.viewl ts of
464 EmptyL -> ""
465 t :< _ -> Plain.plainDocument $ Seq.singleton t
466
467 setAttr ::
468 Cell (XmlName, TL.Text) ->
469 Seq (Cell (XmlName, TL.Text)) ->
470 Seq (Cell (XmlName, TL.Text))
471 setAttr a@(unCell -> (k, _v)) as =
472 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
473 Just idx -> Seq.update idx a as
474 Nothing -> a <| as
475
476 defaultAttr ::
477 Seq (Cell (XmlName, TL.Text)) ->
478 Cell (XmlName, TL.Text) ->
479 Seq (Cell (XmlName, TL.Text))
480 defaultAttr as a@(unCell -> (k, _v)) =
481 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
482 Just _idx -> as
483 Nothing -> a <| as
484
485 elems :: Set TL.Text
486 elems =
487 [ "about"
488 , "abstract"
489 , "address"
490 , "alias"
491 , "annotation"
492 , "area"
493 , "artwork"
494 , "aside"
495 , "audio"
496 , "author"
497 , "authors"
498 , "bcp14"
499 , "br"
500 , "call"
501 , "city"
502 , "code"
503 , "comment"
504 , "comments"
505 , "country"
506 , "date"
507 , "dd"
508 , "define"
509 , "del"
510 , "div"
511 , "dl"
512 , "document"
513 , "dt"
514 , "editor"
515 , "email"
516 , "embed"
517 , "eref"
518 , "fax"
519 , "feed"
520 , "feedback"
521 , "figure"
522 , "filter"
523 , "format"
524 , "from"
525 , "h"
526 , "hi"
527 , "html5"
528 , "i"
529 , "index"
530 , "iref"
531 , "keyword"
532 , "li"
533 , "link"
534 , "name"
535 , "note"
536 , "ol"
537 , "organization"
538 , "para"
539 , "postamble"
540 , "preamble"
541 , "q"
542 , "ref"
543 , "reference"
544 , "references"
545 , "region"
546 , "rref"
547 , "sc"
548 , "section"
549 , "serie"
550 , "source"
551 , "span"
552 , "street"
553 , "style"
554 , "sub"
555 , "sup"
556 , "table"
557 , "tbody"
558 , "td"
559 , "tel"
560 , "tfoot"
561 , "title"
562 , "th"
563 , "thead"
564 , "toc"
565 , "tof"
566 , "tr"
567 , "tt"
568 , "u"
569 , "ul"
570 , "uri"
571 , "version"
572 , "video"
573 , "workgroup"
574 , "xml"
575 , "zipcode"
576 ]