]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Add NodePara and NodeGroup.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Arrow (first)
10 import Control.Monad (Monad(..), (=<<))
11 import Data.Bool
12 import Data.Default.Class (Default(..))
13 import Data.Eq (Eq(..))
14 import Data.Foldable (null, foldl', any)
15 import Data.Function (($), (.), id)
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Maybe (Maybe(..), maybe)
18 import Data.Monoid (Monoid(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
21 import Data.Set (Set)
22 import Data.Text (Text)
23 import Data.TreeSeq.Strict (Tree(..))
24 import GHC.Exts (toList)
25 import Prelude (undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text as Text
30 import qualified Data.Text.Lazy as TL
31 import qualified Control.Monad.Trans.State as S
32 import qualified Language.TCT.Write.Plain as Plain
33 import qualified System.FilePath as FP
34
35 import Text.Blaze.XML ()
36 import Language.TCT hiding (Parser)
37 import Language.XML
38 import qualified Data.TreeSeq.Strict as TreeSeq
39
40 import Debug.Trace (trace)
41 import Text.Show (show)
42
43 xmlDocument :: TCTs -> XMLs
44 xmlDocument trees =
45 -- (`S.evalState` def) $
46 case Seq.viewl trees of
47 TreeN (unCell -> KeySection{}) vs :< ts ->
48 case spanlTokens vs of
49 (titles@(Seq.viewl -> (Seq.viewl -> (posTree -> bp) :< _) :< _), vs') ->
50 let vs'' =
51 case Seq.findIndexL
52 (\case
53 TreeN (unCell -> KeyColon "about" _) _ -> True
54 _ -> False) vs' of
55 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
56 Just{} -> vs' in
57 xmlify def
58 { inh_titles = titles
59 , inh_figure = True
60 , inh_tree0 = List.repeat xmlPara
61 } vs'' <>
62 xmlify def ts
63 _ -> xmlify def trees
64 _ -> xmlify def trees
65
66 {-
67 -- * Type 'Xmls'
68 type Xmls = S.State State XMLs
69 type Xml = S.State State XML
70 instance Semigroup Xmls where
71 (<>) = liftA2 (<>)
72 instance Monoid Xmls where
73 mempty = return mempty
74 mappend = (<>)
75
76 -- * Type 'State'
77 data State
78 = State
79 { state_pos :: Pos
80 }
81 instance Default State where
82 def = State
83 { state_pos = pos1
84 }
85 -}
86
87 -- * Type 'Inh'
88 data Inh
89 = Inh
90 { inh_figure :: Bool
91 , inh_tree0 :: [Pos -> XMLs -> XML]
92 , inh_titles :: Seq Tokens
93 }
94 instance Default Inh where
95 def = Inh
96 { inh_figure = False
97 , inh_tree0 = []
98 , inh_titles = mempty
99 }
100
101 -- * Class 'Xmlify'
102 class Xmlify a where
103 xmlify :: Inh -> a -> XMLs
104 instance Xmlify TCTs where
105 xmlify inh_orig = go inh_orig
106 where
107 go :: Inh -> TCTs -> XMLs
108 go inh trees =
109 case Seq.viewl trees of
110 TreeN (Cell bp ep (KeyBar n _)) _ :< _
111 | (body,ts) <- spanlBar n trees
112 , not (null body) ->
113 (<| go inh ts) $
114 TreeN (Cell bp ep "artwork") $
115 maybe id (\v -> (Tree0 (XmlAttr (Cell bp ep ("type", v))) <|)) (mimetype n) $
116 body >>= xmlify inh{inh_tree0=[]}
117
118 TreeN key@(unCell -> KeyColon n _) cs :< ts
119 | (cs',ts') <- spanlKeyColon n ts
120 , not (null cs') ->
121 go inh $ TreeN key (cs<>cs') <| ts'
122
123 TreeN (Cell bp ep KeyBrackets{}) _ :< _
124 | (rl,ts) <- spanlBrackets trees
125 , not (null rl) ->
126 (<| go inh ts) $
127 TreeN (Cell bp ep "references") $
128 rl >>= xmlify inh_orig
129
130 _ | (ul,ts) <- spanlItems (==KeyDash) trees
131 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
132 (<| go inh ts) $
133 TreeN (Cell bp ep "ul") $
134 ul >>= xmlify inh{inh_tree0=List.repeat xmlPara}
135
136 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
137 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
138 (<| go inh ts) $
139 TreeN (Cell bp ep "ol") $
140 ol >>= xmlify inh{inh_tree0=List.repeat xmlPara}
141
142 t@(Tree0 toks) :< ts ->
143 case inh_tree0 inh of
144 [] -> xmlify inh_orig t <> go inh ts
145 _ | isTokenElem toks -> xmlify inh_orig t <> go inh ts
146 tree0:inh_tree0 ->
147 (case Seq.viewl toks of
148 EmptyL -> id
149 (posTree -> bp) :< _ -> (tree0 bp (xmlify inh_orig t) <|)) $
150 go inh{inh_tree0} ts
151
152 t:<ts ->
153 xmlify inh_orig t <>
154 go inh ts
155
156 _ -> mempty
157 instance Xmlify TCT where
158 xmlify inh tr =
159 case tr of
160 TreeN (Cell bp ep KeySection{}) ts ->
161 let (attrs,body) = partitionAttributesChildren ts in
162 let inh' = inh
163 { inh_tree0 = xmlTitle : List.repeat xmlPara
164 , inh_figure = True
165 } in
166 Seq.singleton $
167 TreeN (Cell bp ep "section") $
168 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
169 xmlify inh' body
170
171 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
172 let (attrs,body) = partitionAttributesChildren ts in
173 let inh' = inh { inh_tree0 =
174 case kn of
175 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
176 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
177 "serie" -> List.repeat xmlName
178 "author" -> List.repeat xmlName
179 "editor" -> List.repeat xmlName
180 "org" -> List.repeat xmlName
181 _ -> []
182 } in
183 case () of
184 _ | kn == "about" -> xmlAbout inh' key attrs body
185
186 _ | inh_figure inh && not (kn`List.elem`elems) ->
187 Seq.singleton $
188 TreeN (Cell bp ep "figure") $
189 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
190 case toList body of
191 [Tree0{}] -> xmlify inh'{inh_tree0 = List.repeat xmlPara} body
192 _ -> xmlify inh'{inh_tree0 = xmlTitle : List.repeat xmlPara} body
193
194 _ -> Seq.singleton $ xmlKey inh' key attrs body
195
196 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
197
198 Tree0 ts -> xmlify inh ts
199 instance Xmlify Tokens where
200 xmlify inh toks =
201 case Seq.viewl toks of
202 TreeN (Cell bp _ep PairParen) paren
203 :< (Seq.viewl -> TreeN (Cell bb eb PairBracket) bracket
204 :< ts) ->
205 (<| xmlify inh ts) $
206 case bracket of
207 (toList -> [Tree0 (Cell bl el (TokenLink lnk))]) ->
208 TreeN (Cell bp eb "eref") $
209 xmlAttrs [Cell bl el ("to",lnk)] <>
210 xmlify inh paren
211 _ ->
212 TreeN (Cell bp eb "rref") $
213 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Plain.text def bracket)] <>
214 xmlify inh paren
215 t :< ts -> xmlify inh t `unionXml` xmlify inh ts
216 Seq.EmptyL -> mempty
217 instance Xmlify Token where
218 xmlify inh (TreeN (Cell bp ep p) ts) =
219 case p of
220 PairBracket | to <- Plain.text def ts
221 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
222 Seq.singleton .
223 TreeN (cell "rref") $
224 xmlAttrs [cell ("to",TL.toStrict to)]
225 PairStar -> Seq.singleton . TreeN (cell "b") $ xmlify inh ts
226 PairSlash -> Seq.singleton . TreeN (cell "i") $ xmlify inh ts
227 PairBackquote -> Seq.singleton . TreeN (cell "code") $ xmlify inh ts
228 PairFrenchquote ->
229 Seq.singleton .
230 TreeN (cell "q") $
231 xmlify inh ts
232 {-
233 case ts of
234 (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) ->
235 case Seq.viewr ls of
236 m :> Tree0 (Cell br er (TokenPlain r)) ->
237 xmlify inh $
238 Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l)))
239 <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r))))
240 _ ->
241 xmlify inh $
242 Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls
243 (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) ->
244 xmlify inh $
245 rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r)))
246 _ -> xmlify inh ts
247 -}
248 PairHash ->
249 Seq.singleton .
250 TreeN (cell "ref") $
251 xmlAttrs [cell ("to",TL.toStrict $ Plain.text def ts)]
252 PairElem name attrs ->
253 Seq.singleton .
254 TreeN (cell $ xmlLocalName name) $
255 xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) ->
256 cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <>
257 xmlify inh ts
258 _ ->
259 let (o,c) = pairBorders p ts in
260 Seq.singleton (Tree0 $ XmlPhrases $ phrasify $ Cell bp bp o) `unionXml`
261 xmlify inh ts `unionXml`
262 Seq.singleton (Tree0 $ XmlPhrases $ phrasify $ Cell ep ep c)
263 where
264 cell :: a -> Cell a
265 cell = Cell bp ep
266 xmlify inh (Tree0 tok) = do
267 case tok of
268 TokenPhrases ps -> xmlify inh $ ps
269 TokenEscape c -> Seq.singleton $ Tree0 $ XmlPhrases $ phrasify c
270 TokenRaw t -> Seq.singleton $ Tree0 $ XmlText t
271 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
272 TokenLink (Cell bp ep lnk) ->
273 xmlify (Cell bp ep ()) <>
274 Seq.singleton (TreeN (cell "eref") $ xmlAttrs [cell ("to",lnk)])
275 where
276 cell :: a -> Cell a
277 cell = Cell bp ep
278 {-
279 whites :: Pos -> Pos -> Seq XmlText
280 whites (Pos bLine bCol) (Pos eLine eCol) =
281 case bLine`compate`eLine of
282 LT -> verts <>
283 EQ -> horiz bCol eCol
284 GT ->
285 -}
286 instance Xmlify (Cell Phrase) where
287 xmlify _inh t = Seq.singleton $ Tree0 $ XmlPhrases $ Seq.singleton t
288
289 mimetype :: Text -> Maybe Text
290 mimetype "hs" = Just "text/x-haskell"
291 mimetype "sh" = Just "text/x-shellscript"
292 mimetype "shell" = Just "text/x-shellscript"
293 mimetype "shellscript" = Just "text/x-shellscript"
294 mimetype _ = Nothing
295
296 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
297 xmlPhantom n bp = TreeN (Cell bp bp n)
298 xmlPara :: Pos -> XMLs -> XML
299 xmlPara = xmlPhantom "para"
300 xmlTitle :: Pos -> XMLs -> XML
301 xmlTitle = xmlPhantom "title"
302 xmlName :: Pos -> XMLs -> XML
303 xmlName bp (toList -> [Tree0 (XmlText (unCell -> t))]) = Tree0 (XmlAttr $ Cell bp bp ("name", t))
304 xmlName bp ts = xmlPhantom "name" bp ts
305
306 xmlAbout ::
307 Inh ->
308 Cell Key -> Seq (Cell (XmlName, Text)) ->
309 TCTs -> XMLs
310 xmlAbout inh key attrs body =
311 Seq.singleton $
312 xmlKey inh key attrs $
313 case Seq.viewl (inh_titles inh) of
314 (Seq.viewl -> (posTree -> bt) :< _) :< _ ->
315 ((<$> inh_titles inh) $ \title ->
316 TreeN (Cell bt bt $ KeyColon "title" "") $
317 Seq.singleton $ Tree0 title)
318 <> body
319 _ -> body
320
321 xmlKey :: Inh -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
322 xmlKey inh (Cell bp ep key) attrs ts =
323 case key of
324 KeyColon n _wh -> d_key n
325 KeyGreat n _wh -> d_key n
326 KeyEqual n _wh -> d_key n
327 KeyBar n _wh -> d_key n
328 KeyDot _n -> TreeN (cell "li") $ xmlify inh ts
329 KeyDash -> TreeN (cell "li") $ xmlify inh ts
330 KeyDashDash -> Tree0 $ XmlComment $ cell $ TL.toStrict com
331 where
332 com :: TL.Text
333 com =
334 trace ("TS: "<>show ts) $
335 trace ("RS: "<>show (S.evalState (Plain.rackUpLeft ts) Nothing)) $
336 Plain.text def $ S.evalState (Plain.rackUpLeft ts) Nothing
337 {-
338 TreeSeq.mapAlsoNode
339 (cell1 . unCell)
340 (\_k -> fmap $
341 TreeSeq.mapAlsoNode
342 (cell1 . unCell)
343 (\_k' -> cell1 . unCell)) <$> ts
344 -}
345 KeyLower n as -> TreeN (cell "artwork") $ xmlify inh ts
346 KeyBrackets ident ->
347 let inh' = inh{inh_figure = False} in
348 let (attrs',body) = partitionAttributesChildren ts in
349 TreeN (cell "reference") $
350 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) (attrs<>attrs')) <>
351 xmlify inh'{inh_tree0 = xmlTitle : xmlTitle : List.repeat xmlPara} body
352 KeyDotSlash p ->
353 TreeN (cell "include") $
354 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
355 xmlify inh ts
356 where
357 cell :: a -> Cell a
358 cell = Cell bp ep
359 d_key :: Text -> XML
360 d_key n =
361 TreeN (cell $ xmlLocalName n) $
362 xmlAttrs attrs <>
363 xmlify inh ts
364
365 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
366 xmlAttrs = (Tree0 . XmlAttr <$>)
367
368 -- | Unify two 'XMLs', merging border 'XmlText's if any.
369 unionXml :: XMLs -> XMLs -> XMLs
370 unionXml x y =
371 case (Seq.viewr x, Seq.viewl y) of
372 (xs :> x0, y0 :< ys) ->
373 case (x0,y0) of
374 ( Tree0 (XmlPhrases tx)
375 , Tree0 (XmlPhrases ty) ) ->
376 xs `unionXml`
377 Seq.singleton (Tree0 $ XmlPhrases $ tx <> ty) `unionXml`
378 ys
379 ( Tree0 (XmlText tx)
380 , Tree0 (XmlText ty) ) ->
381 xs `unionXml`
382 Seq.singleton (Tree0 $ XmlText $ tx <> ty) `unionXml`
383 ys
384 _ -> x <> y
385 (Seq.EmptyR, _) -> y
386 (_, Seq.EmptyL) -> x
387
388 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
389 spanlBar name = first unKeyBar . spanBar
390 where
391 unKeyBar :: TCTs -> TCTs
392 unKeyBar = (=<<) $ \case
393 TreeN (unCell -> KeyBar{}) ts -> ts
394 _ -> mempty
395 spanBar =
396 Seq.spanl $ \case
397 TreeN (unCell -> KeyBar n _) _ | n == name -> True
398 _ -> False
399
400 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
401 spanlItems liKey ts =
402 let (lis, ts') = spanLIs ts in
403 foldl' accumLIs (mempty,ts') lis
404 where
405 spanLIs :: TCTs -> (TCTs, TCTs)
406 spanLIs = Seq.spanl $ \case
407 TreeN (unCell -> liKey -> True) _ -> True
408 Tree0 toks ->
409 (`any` toks) $ \case
410 TreeN (unCell -> PairElem "li" _) _ -> True
411 _ -> False
412 {-
413 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
414 [unCell -> TokenPair (PairElem "li" _) _] -> True
415 _ -> False
416 -}
417 _ -> False
418 accumLIs :: (TCTs,TCTs) -> TCT -> (TCTs,TCTs)
419 accumLIs acc@(oks,kos) t =
420 case t of
421 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
422 Tree0 toks ->
423 let (ok,ko) =
424 (`Seq.spanl` toks) $ \case
425 TreeN (unCell -> PairElem "li" _) _ -> True
426 -- (isTokenWhite -> True) -> True -- TODO: see if this is still useful
427 _ -> False in
428 ( if null ok then oks else oks|>Tree0 (rmTokenWhite ok)
429 , if null ko then kos else Tree0 ko<|kos )
430 _ -> acc
431 {-
432 rmTokenWhite :: Tokens -> Tokens
433 rmTokenWhite =
434 Seq.filter $ \case
435 (isTokenWhite -> False) -> True
436 _ -> True
437 -}
438
439 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
440 spanlKeyColon name =
441 Seq.spanl $ \case
442 TreeN (unCell -> KeyBar n _) _ -> n == name
443 TreeN (unCell -> KeyGreat n _) _ -> n == name
444 _ -> False
445
446 spanlBrackets :: TCTs -> (TCTs, TCTs)
447 spanlBrackets =
448 Seq.spanl $ \case
449 TreeN (unCell -> KeyBrackets{}) _ -> True
450 _ -> False
451
452 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
453 spanlTokens =
454 first ((\case
455 Tree0 ts -> ts
456 _ -> undefined) <$>) .
457 Seq.spanl (\case
458 Tree0{} -> True
459 _ -> False)
460
461 getAttrId :: TCTs -> Text
462 getAttrId ts =
463 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
464 Just (Tree0 toks) -> TL.toStrict $ Plain.text def toks
465 _ -> ""
466
467 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
468 setXmlAttr a@(unCell -> (k, _v)) as =
469 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
470 Just idx -> Seq.update idx a as
471 Nothing -> a <| as
472
473 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
474 defXmlAttr a@(unCell -> (k, _v)) as =
475 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
476 Just _idx -> as
477 Nothing -> a <| as
478
479 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
480 partitionAttributesChildren ts = (attrs,cs)
481 where
482 (as,cs) = (`Seq.partition` ts) $ \case
483 TreeN (unCell -> KeyEqual{}) _cs -> True
484 _ -> False
485 attrs = attr <$> as
486 attr = \case
487 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
488 Cell bp ep (xmlLocalName n, v)
489 where
490 v = TL.toStrict $
491 Plain.text def{Plain.state_escape = False} $
492 TreeSeq.mapAlsoNode
493 (cell1 . unCell)
494 (\_k -> fmap $
495 TreeSeq.mapAlsoNode
496 (cell1 . unCell)
497 (\_k' -> cell1 . unCell)) <$> a
498 _ -> undefined
499
500 elems :: Set Text
501 elems =
502 [ "about"
503 , "abstract"
504 , "address"
505 , "alias"
506 , "annotation"
507 , "area"
508 , "artwork"
509 , "aside"
510 , "audio"
511 , "author"
512 , "authors"
513 , "bcp14"
514 , "br"
515 , "call"
516 , "city"
517 , "code"
518 , "comment"
519 , "comments"
520 , "country"
521 , "date"
522 , "dd"
523 , "define"
524 , "del"
525 , "div"
526 , "dl"
527 , "document"
528 , "dt"
529 , "editor"
530 , "email"
531 , "embed"
532 , "eref"
533 , "fax"
534 , "feed"
535 , "feedback"
536 , "figure"
537 , "filter"
538 , "format"
539 , "from"
540 , "h"
541 , "hi"
542 , "html5"
543 , "i"
544 , "index"
545 , "iref"
546 , "keyword"
547 , "li"
548 , "link"
549 , "name"
550 , "note"
551 , "ol"
552 , "organization"
553 , "para"
554 , "postamble"
555 , "preamble"
556 , "q"
557 , "ref"
558 , "reference"
559 , "references"
560 , "region"
561 , "rref"
562 , "sc"
563 , "section"
564 , "serie"
565 , "source"
566 , "span"
567 , "street"
568 , "style"
569 , "sub"
570 , "sup"
571 , "table"
572 , "tbody"
573 , "td"
574 , "tel"
575 , "tfoot"
576 , "title"
577 , "th"
578 , "thead"
579 , "toc"
580 , "tof"
581 , "tr"
582 , "tt"
583 , "u"
584 , "ul"
585 , "uri"
586 , "version"
587 , "video"
588 , "workgroup"
589 , "xml"
590 , "zipcode"
591 ]