]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Cosmetic changes.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Arrow (first)
10 import Control.Monad (Monad(..), (=<<))
11 import Data.Bool
12 import Data.Default.Class (Default(..))
13 import Data.Eq (Eq(..))
14 import Data.Foldable (null, foldl', any)
15 import Data.Function (($), (.), id)
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Maybe (Maybe(..), maybe)
18 import Data.Monoid (Monoid(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
21 import Data.Set (Set)
22 import Data.Text (Text)
23 import Data.TreeSeq.Strict (Tree(..))
24 import GHC.Exts (toList)
25 import Prelude (undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text as Text
30 import qualified Data.Text.Lazy as TL
31 import qualified Language.TCT.Write.Plain as Plain
32 import qualified System.FilePath as FP
33
34 import Text.Blaze.XML ()
35 import Language.TCT hiding (Parser)
36 import Language.XML
37 import qualified Data.TreeSeq.Strict as TreeSeq
38
39 -- * Type 'Inh'
40 data Inh
41 = Inh
42 { inh_figure :: Bool
43 , inh_tree0 :: [Pos -> XMLs -> XML]
44 , inh_titles :: Seq Tokens
45 }
46 instance Default Inh where
47 def = Inh
48 { inh_figure = False
49 , inh_tree0 = []
50 , inh_titles = mempty
51 }
52
53 -- * Class 'Xmlify'
54 class Xmlify a where
55 xmlify :: Inh -> a -> XMLs
56 instance Xmlify TCTs where
57 xmlify inh_orig = go inh_orig
58 where
59 go :: Inh -> TCTs -> XMLs
60 go inh trees =
61 case Seq.viewl trees of
62 TreeN (Cell bp ep (KeyBar n _)) _ :< _
63 | (body,ts) <- spanlBar n trees
64 , not (null body) ->
65 (<| go inh ts) $
66 TreeN (Cell bp ep "artwork") $
67 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
68 body >>= xmlify inh{inh_tree0=[]}
69
70 TreeN key@(unCell -> KeyColon n _) cs :< ts
71 | (cs',ts') <- spanlKeyColon n ts
72 , not (null cs') ->
73 go inh $ TreeN key (cs<>cs') <| ts'
74
75 TreeN (Cell bp ep KeyBrackets{}) _ :< _
76 | (rl,ts) <- spanlBrackets trees
77 , not (null rl) ->
78 (<| go inh ts) $
79 TreeN (Cell bp ep "references") $
80 rl >>= xmlify inh_orig
81
82 _ | (ul,ts) <- spanlItems (==KeyDash) trees
83 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
84 (<| go inh ts) $
85 TreeN (Cell bp ep "ul") $
86 ul >>= xmlify inh{inh_tree0=List.repeat xmlPara}
87
88 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
89 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
90 (<| go inh ts) $
91 TreeN (Cell bp ep "ol") $
92 ol >>= xmlify inh{inh_tree0=List.repeat xmlPara}
93
94 t@(Tree0 toks) :< ts ->
95 case inh_tree0 inh of
96 [] -> xmlify inh_orig t <> go inh ts
97 _ | isTokenElem toks -> xmlify inh_orig t <> go inh ts
98 tree0:inh_tree0 ->
99 (case Seq.viewl toks of
100 EmptyL -> id
101 Cell bp _ep _ :< _ -> (tree0 bp (xmlify inh_orig t) <|)) $
102 go inh{inh_tree0} ts
103
104 t:<ts ->
105 xmlify inh_orig t <>
106 go inh ts
107
108 _ -> mempty
109 instance Xmlify TCT where
110 xmlify inh tr =
111 case tr of
112 TreeN (Cell bp ep KeySection{}) ts ->
113 let (attrs,body) = partitionAttributesChildren ts in
114 let inh' = inh
115 { inh_tree0 = xmlTitle : List.repeat xmlPara
116 , inh_figure = True
117 } in
118 Seq.singleton $
119 TreeN (Cell bp ep "section") $
120 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
121 xmlify inh' body
122
123 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
124 let (attrs,body) = partitionAttributesChildren ts in
125 let inh' = inh { inh_tree0 =
126 case kn of
127 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
128 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
129 "serie" -> List.repeat xmlName
130 "author" -> List.repeat xmlName
131 "editor" -> List.repeat xmlName
132 "org" -> List.repeat xmlName
133 _ -> []
134 } in
135 case () of
136 _ | kn == "about" -> xmlAbout inh' key attrs body
137
138 _ | inh_figure inh && not (kn`List.elem`elems) ->
139 Seq.singleton $
140 TreeN (Cell bp ep "figure") $
141 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
142 case toList body of
143 [Tree0{}] -> xmlify inh'{inh_tree0 = List.repeat xmlPara} body
144 _ -> xmlify inh'{inh_tree0 = xmlTitle : List.repeat xmlPara} body
145
146 _ -> Seq.singleton $ xmlKey inh' key attrs body
147
148 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
149
150 Tree0 ts -> xmlify inh ts
151 instance Xmlify Tokens where
152 xmlify inh toks =
153 case Seq.viewl toks of
154 Cell bp _ep (TokenPair PairParen paren)
155 :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket)
156 :< ts) ->
157 (<| xmlify inh ts) $
158 case bracket of
159 (toList -> [Cell bl el (TokenLink lnk)]) ->
160 TreeN (Cell bp eb "eref") $
161 xmlAttrs [Cell bl el ("to",lnk)] <>
162 xmlify inh paren
163 _ ->
164 TreeN (Cell bp eb "rref") $
165 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Plain.textify bracket)] <>
166 xmlify inh paren
167 t :< ts -> xmlify inh t `unionXml` xmlify inh ts
168 Seq.EmptyL -> mempty
169 instance Xmlify (Cell Token) where
170 xmlify inh (Cell bp ep tk) =
171 case tk of
172 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
173 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
174 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
175 TokenLink lnk -> Seq.singleton $
176 TreeN (cell "eref") $
177 xmlAttrs [cell ("to",lnk)]
178 TokenPair PairBracket ts | to <- Plain.textify ts
179 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
180 Seq.singleton $
181 TreeN (cell "rref") $
182 xmlAttrs [cell ("to",TL.toStrict to)]
183 TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ xmlify inh ts
184 TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ xmlify inh ts
185 TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ xmlify inh ts
186 TokenPair PairFrenchquote toks@ts ->
187 Seq.singleton $
188 TreeN (cell "q") $
189 case ts of
190 (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) ->
191 case Seq.viewr ls of
192 m :> Cell br er (TokenPlain r) ->
193 xmlify inh $
194 Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))
195 <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))
196 _ ->
197 xmlify inh $
198 Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls
199 (Seq.viewr -> rs :> Cell br er (TokenPlain r)) ->
200 xmlify inh $
201 rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r))
202 _ -> xmlify inh toks
203 TokenPair PairHash to ->
204 Seq.singleton $
205 TreeN (cell "ref") $
206 xmlAttrs [cell ("to",TL.toStrict $ Plain.textify to)]
207 TokenPair (PairElem name attrs) ts ->
208 Seq.singleton $
209 TreeN (cell $ xmlLocalName name) $
210 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) ->
211 cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
212 xmlify inh ts
213 TokenPair p ts ->
214 let (o,c) = pairBorders p ts in
215 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
216 xmlify inh ts `unionXml`
217 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
218 where
219 cell :: a -> Cell a
220 cell = Cell bp ep
221
222 mimetype :: Text -> Maybe Text
223 mimetype "hs" = Just "text/x-haskell"
224 mimetype "sh" = Just "text/x-shellscript"
225 mimetype "shell" = Just "text/x-shellscript"
226 mimetype "shellscript" = Just "text/x-shellscript"
227 mimetype _ = Nothing
228
229 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
230 xmlPhantom n bp = TreeN (Cell bp bp n)
231 xmlPara :: Pos -> XMLs -> XML
232 xmlPara = xmlPhantom "para"
233 xmlTitle :: Pos -> XMLs -> XML
234 xmlTitle = xmlPhantom "title"
235 xmlName :: Pos -> XMLs -> XML
236 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
237 xmlName bp ts = xmlPhantom "name" bp ts
238
239 xmlDocument :: TCTs -> XMLs
240 xmlDocument trees =
241 case Seq.viewl trees of
242 TreeN (unCell -> KeySection{}) vs :< ts ->
243 case spanlTokens vs of
244 (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') ->
245 let vs'' =
246 case Seq.findIndexL
247 (\case
248 TreeN (unCell -> KeyColon "about" _) _ -> True
249 _ -> False) vs' of
250 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
251 Just{} -> vs' in
252 xmlify def
253 { inh_titles = titles
254 , inh_figure = True
255 , inh_tree0 = List.repeat xmlPara
256 } vs'' <>
257 xmlify def ts
258 _ -> xmlify def trees
259 _ -> xmlify def trees
260
261 xmlAbout ::
262 Inh ->
263 Cell Key -> Seq (Cell (XmlName, Text)) ->
264 TCTs -> XMLs
265 xmlAbout inh key attrs body =
266 Seq.singleton $
267 xmlKey inh key attrs $
268 case Seq.viewl (inh_titles inh) of
269 (Seq.viewl -> Cell bt _et _ :< _) :< _ ->
270 ((<$> inh_titles inh) $ \title ->
271 TreeN (Cell bt bt $ KeyColon "title" "") $
272 Seq.singleton $ Tree0 title)
273 <> body
274 _ -> body
275
276 xmlKey :: Inh -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
277 xmlKey inh (Cell bp ep key) attrs ts =
278 case key of
279 KeyColon n _wh -> d_key n
280 KeyGreat n _wh -> d_key n
281 KeyEqual n _wh -> d_key n
282 KeyBar n _wh -> d_key n
283 KeyDot _n -> TreeN (cell "li") $ xmlify inh ts
284 KeyDash -> TreeN (cell "li") $ xmlify inh ts
285 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
286 where
287 com :: TL.Text
288 com =
289 Plain.text def $
290 TreeSeq.mapAlsoNode
291 (cell1 . unCell)
292 (\_path -> fmap $ cell1 . unCell) <$> ts
293 KeyLower n as -> TreeN (cell "artwork") $ xmlify inh ts
294 KeyBrackets ident ->
295 let inh' = inh{inh_figure = False} in
296 let (attrs',body) = partitionAttributesChildren ts in
297 TreeN (cell "reference") $
298 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) (attrs<>attrs')) <>
299 xmlify inh'{inh_tree0 = xmlTitle : xmlTitle : List.repeat xmlPara} body
300 KeyDotSlash p ->
301 TreeN (cell "include") $
302 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
303 xmlify inh ts
304 where
305 cell :: a -> Cell a
306 cell = Cell bp ep
307 d_key :: Text -> XML
308 d_key n =
309 TreeN (cell $ xmlLocalName n) $
310 xmlAttrs attrs <>
311 xmlify inh ts
312
313 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
314 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
315
316 -- | Unify two 'XMLs', merging border 'XmlText's if any.
317 unionXml :: XMLs -> XMLs -> XMLs
318 unionXml x y =
319 case (Seq.viewr x, Seq.viewl y) of
320 (xs :> x0, y0 :< ys) ->
321 case (x0,y0) of
322 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
323 xs `unionXml`
324 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
325 ys
326 _ -> x <> y
327 (Seq.EmptyR, _) -> y
328 (_, Seq.EmptyL) -> x
329
330 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
331 spanlBar name = first unKeyBar . spanBar
332 where
333 unKeyBar :: TCTs -> TCTs
334 unKeyBar = (=<<) $ \case
335 TreeN (unCell -> KeyBar{}) ts -> ts
336 _ -> mempty
337 spanBar =
338 Seq.spanl $ \case
339 TreeN (unCell -> KeyBar n _) _ | n == name -> True
340 _ -> False
341
342 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
343 spanlItems liKey ts =
344 let (lis, ts') = spanLIs ts in
345 foldl' accumLIs (mempty,ts') lis
346 where
347 spanLIs = Seq.spanl $ \case
348 TreeN (unCell -> liKey -> True) _ -> True
349 Tree0 toks ->
350 (`any` toks) $ \case
351 (unCell -> TokenPair (PairElem "li" _) _) -> True
352 _ -> False
353 {-
354 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
355 [unCell -> TokenPair (PairElem "li" _) _] -> True
356 _ -> False
357 -}
358 _ -> False
359 accumLIs acc@(oks,kos) t =
360 case t of
361 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
362 Tree0 toks ->
363 let (ok,ko) =
364 (`Seq.spanl` toks) $ \tok ->
365 case unCell tok of
366 TokenPair (PairElem "li" _) _ -> True
367 TokenPlain txt -> Char.isSpace`Text.all`txt
368 _ -> False in
369 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
370 , if null ko then kos else Tree0 ko<|kos )
371 _ -> acc
372 rmTokenPlain =
373 Seq.filter $ \case
374 (unCell -> TokenPlain{}) -> False
375 _ -> True
376
377 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
378 spanlKeyColon name =
379 Seq.spanl $ \case
380 TreeN (unCell -> KeyBar n _) _ -> n == name
381 TreeN (unCell -> KeyGreat n _) _ -> n == name
382 _ -> False
383
384 spanlBrackets :: TCTs -> (TCTs, TCTs)
385 spanlBrackets =
386 Seq.spanl $ \case
387 TreeN (unCell -> KeyBrackets{}) _ -> True
388 _ -> False
389
390 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
391 spanlTokens =
392 first ((\case
393 Tree0 ts -> ts
394 _ -> undefined) <$>) .
395 Seq.spanl (\case
396 Tree0{} -> True
397 _ -> False)
398
399 getAttrId :: TCTs -> Text
400 getAttrId ts =
401 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
402 Just (Tree0 toks) -> TL.toStrict $ Plain.textify toks
403 _ -> ""
404
405 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
406 setXmlAttr a@(unCell -> (k, _v)) as =
407 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
408 Just idx -> Seq.update idx a as
409 Nothing -> a <| as
410
411 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
412 defXmlAttr a@(unCell -> (k, _v)) as =
413 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
414 Just _idx -> as
415 Nothing -> a <| as
416
417 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
418 partitionAttributesChildren ts = (attrs,cs)
419 where
420 (as,cs) = (`Seq.partition` ts) $ \case
421 TreeN (unCell -> KeyEqual{}) _cs -> True
422 _ -> False
423 attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as
424 attr = \case
425 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
426 Cell bp ep (xmlLocalName n, v)
427 where
428 v = TL.toStrict $
429 Plain.text def{Plain.state_escape = False} $
430 TreeSeq.mapAlsoNode (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a
431 _ -> undefined
432
433 elems :: Set Text
434 elems =
435 [ "about"
436 , "abstract"
437 , "address"
438 , "alias"
439 , "annotation"
440 , "area"
441 , "artwork"
442 , "aside"
443 , "audio"
444 , "author"
445 , "authors"
446 , "bcp14"
447 , "br"
448 , "call"
449 , "city"
450 , "code"
451 , "comment"
452 , "comments"
453 , "country"
454 , "date"
455 , "dd"
456 , "define"
457 , "del"
458 , "div"
459 , "dl"
460 , "document"
461 , "dt"
462 , "editor"
463 , "email"
464 , "embed"
465 , "eref"
466 , "fax"
467 , "feed"
468 , "feedback"
469 , "figure"
470 , "filter"
471 , "format"
472 , "from"
473 , "h"
474 , "hi"
475 , "html5"
476 , "i"
477 , "index"
478 , "iref"
479 , "keyword"
480 , "li"
481 , "link"
482 , "name"
483 , "note"
484 , "ol"
485 , "organization"
486 , "para"
487 , "postamble"
488 , "preamble"
489 , "q"
490 , "ref"
491 , "reference"
492 , "references"
493 , "region"
494 , "rref"
495 , "sc"
496 , "section"
497 , "serie"
498 , "source"
499 , "span"
500 , "street"
501 , "style"
502 , "sub"
503 , "sup"
504 , "table"
505 , "tbody"
506 , "td"
507 , "tel"
508 , "tfoot"
509 , "title"
510 , "th"
511 , "thead"
512 , "toc"
513 , "tof"
514 , "tr"
515 , "tt"
516 , "u"
517 , "ul"
518 , "uri"
519 , "version"
520 , "video"
521 , "workgroup"
522 , "xml"
523 , "zipcode"
524 ]