]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Fix Reference.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Arrow (first)
10 import Control.Monad (Monad(..), (=<<))
11 import Data.Bool
12 import Data.Default.Class (Default(..))
13 import Data.Eq (Eq(..))
14 import Data.Foldable (null, foldl', any)
15 import Data.Function (($), (.), id)
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Maybe (Maybe(..), maybe)
18 import Data.Monoid (Monoid(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
21 import Data.Set (Set)
22 import Data.Text (Text)
23 import Data.TreeSeq.Strict (Tree(..))
24 import GHC.Exts (toList)
25 import Prelude (undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text as Text
30 import qualified Data.Text.Lazy as TL
31 import qualified Language.TCT.Write.Plain as Plain
32 import qualified System.FilePath as FP
33
34 import Text.Blaze.XML ()
35 import Language.TCT hiding (Parser)
36 import Language.XML
37 import qualified Data.TreeSeq.Strict as TreeSeq
38
39 -- * Type 'Inh'
40 data Inh
41 = Inh
42 { inh_figure :: Bool
43 , inh_tree0 :: [Pos -> XMLs -> XML]
44 , inh_titles :: Seq Tokens
45 }
46 instance Default Inh where
47 def = Inh
48 { inh_figure = False
49 , inh_tree0 = []
50 , inh_titles = mempty
51 }
52
53 mimetype :: Text -> Maybe Text
54 mimetype "hs" = Just "text/x-haskell"
55 mimetype "sh" = Just "text/x-shellscript"
56 mimetype "shell" = Just "text/x-shellscript"
57 mimetype "shellscript" = Just "text/x-shellscript"
58 mimetype _ = Nothing
59
60 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
61 xmlPhantom n bp = TreeN (Cell bp bp n)
62 xmlPara :: Pos -> XMLs -> XML
63 xmlPara = xmlPhantom "para"
64 xmlTitle :: Pos -> XMLs -> XML
65 xmlTitle = xmlPhantom "title"
66 xmlName :: Pos -> XMLs -> XML
67 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
68 xmlName bp ts = xmlPhantom "name" bp ts
69
70 xmlDocument :: TCTs -> XMLs
71 xmlDocument trees =
72 case Seq.viewl trees of
73 TreeN (unCell -> KeySection{}) vs :< ts ->
74 case spanlTokens vs of
75 (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') ->
76 let vs'' =
77 case Seq.findIndexL
78 (\case
79 TreeN (unCell -> KeyColon "about" _) _ -> True
80 _ -> False) vs' of
81 Just{} -> vs'
82 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
83 in
84 xmlTCTs def
85 { inh_titles = titles
86 , inh_figure = True
87 , inh_tree0 = List.repeat xmlPara
88 } vs'' <>
89 xmlTCTs def ts
90 _ -> xmlTCTs def trees
91 _ -> xmlTCTs def trees
92
93 xmlTCTs :: Inh -> TCTs -> XMLs
94 xmlTCTs inh_orig = go inh_orig
95 where
96 go :: Inh -> TCTs -> XMLs
97 go inh trees =
98 case Seq.viewl trees of
99 TreeN (Cell bp ep (KeyBar n _)) _ :< _
100 | (body,ts) <- spanlBar n trees
101 , not (null body) ->
102 (<| go inh ts) $
103 TreeN (Cell bp ep "artwork") $
104 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
105 body >>= xmlTCT inh{inh_tree0=[]}
106
107 TreeN key@(unCell -> KeyColon n _) cs :< ts
108 | (cs',ts') <- spanlKeyColon n ts
109 , not (null cs') ->
110 go inh $ TreeN key (cs<>cs') <| ts'
111
112 TreeN (Cell bp ep KeyBrackets{}) _ :< _
113 | (rl,ts) <- spanlBrackets trees
114 , not (null rl) ->
115 (<| go inh ts) $
116 TreeN (Cell bp ep "references") $
117 rl >>= xmlTCT inh_orig
118
119 _ | (ul,ts) <- spanlItems (==KeyDash) trees
120 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
121 (<| go inh ts) $
122 TreeN (Cell bp ep "ul") $
123 ul >>= xmlTCT inh{inh_tree0=List.repeat xmlPara}
124
125 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
126 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
127 (<| go inh ts) $
128 TreeN (Cell bp ep "ol") $
129 ol >>= xmlTCT inh{inh_tree0=List.repeat xmlPara}
130
131 t@(Tree0 toks) :< ts | isTokenElem toks ->
132 xmlTCT inh_orig t <>
133 go inh ts
134
135 t@(Tree0 toks) :< ts ->
136 case inh_tree0 inh of
137 [] ->
138 xmlTCT inh_orig t <>
139 go inh{inh_tree0=[]} ts
140 x:xs ->
141 case Seq.viewl toks of
142 EmptyL -> go inh{inh_tree0=xs} ts
143 Cell bp _ep _ :< _ ->
144 (<| go inh{inh_tree0=xs} ts) $
145 x bp $
146 xmlTCT inh_orig t
147
148 t:<ts ->
149 xmlTCT inh_orig t <>
150 go inh ts
151
152 _ -> mempty
153
154 xmlTCT :: Inh -> TCT -> XMLs
155 xmlTCT inh tr =
156 case tr of
157 TreeN (Cell bp ep KeySection{}) ts ->
158 let (attrs,body) = partitionAttributesChildren ts in
159 let inh' = inh
160 { inh_tree0 = xmlTitle : List.repeat xmlPara
161 , inh_figure = True
162 } in
163 Seq.singleton $
164 TreeN (Cell bp ep "section") $
165 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
166 xmlTCTs inh' body
167
168 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
169 let (attrs,body) = partitionAttributesChildren ts in
170 let inh' = inh { inh_tree0 =
171 case kn of
172 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
173 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
174 "serie" -> List.repeat xmlName
175 "author" -> List.repeat xmlName
176 "editor" -> List.repeat xmlName
177 "org" -> List.repeat xmlName
178 _ -> []
179 } in
180 case () of
181 _ | kn == "about" -> xmlAbout inh' key attrs body
182
183 _ | inh_figure inh && not (kn`List.elem`elems) ->
184 Seq.singleton $
185 TreeN (Cell bp ep "figure") $
186 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
187 case toList body of
188 [Tree0{}] -> xmlTCTs inh'{inh_tree0 = List.repeat xmlPara} body
189 _ -> xmlTCTs inh'{inh_tree0 = xmlTitle : List.repeat xmlPara} body
190
191 _ -> Seq.singleton $ xmlKey inh' key attrs body
192
193 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
194
195 Tree0 ts -> xmlTokens ts
196
197 xmlAbout ::
198 Inh ->
199 Cell Key -> Seq (Cell (XmlName, Text)) ->
200 TCTs -> XMLs
201 xmlAbout inh key attrs body =
202 Seq.singleton $
203 xmlKey inh key attrs $
204 case Seq.viewl (inh_titles inh) of
205 (Seq.viewl -> Cell bt _et _ :< _) :< _ ->
206 ((<$> inh_titles inh) $ \title ->
207 TreeN (Cell bt bt $ KeyColon "title" "") $
208 Seq.singleton $ Tree0 title)
209 <> body
210 _ -> body
211
212 xmlKey :: Inh -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
213 xmlKey inh (Cell bp ep key) attrs ts =
214 case key of
215 KeyColon n _wh -> d_key n
216 KeyGreat n _wh -> d_key n
217 KeyEqual n _wh -> d_key n
218 KeyBar n _wh -> d_key n
219 KeyDot _n -> TreeN (cell "li") $ xmlTCTs inh ts
220 KeyDash -> TreeN (cell "li") $ xmlTCTs inh ts
221 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
222 where
223 com :: TL.Text
224 com =
225 Plain.text def $
226 TreeSeq.mapAlsoNode
227 (cell1 . unCell)
228 (\_path -> fmap $ cell1 . unCell) <$> ts
229 KeyLower n as -> TreeN (cell "artwork") $ xmlTCTs inh ts
230 KeyBrackets ident ->
231 let inh' = inh{inh_figure = False} in
232 let (attrs',body) = partitionAttributesChildren ts in
233 TreeN (cell "reference") $
234 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) (attrs<>attrs')) <>
235 xmlTCTs inh'{inh_tree0 = xmlTitle : xmlTitle : List.repeat xmlPara} body
236 KeyDotSlash p ->
237 TreeN (cell "include") $
238 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
239 xmlTCTs inh ts
240 where
241 cell :: a -> Cell a
242 cell = Cell bp ep
243 d_key :: Text -> XML
244 d_key n =
245 TreeN (cell $ xmlLocalName n) $
246 xmlAttrs attrs <>
247 xmlTCTs inh ts
248
249 xmlTokens :: Tokens -> XMLs
250 xmlTokens tok = goTokens tok
251 where
252 go :: Cell Token -> XMLs
253 go (Cell bp ep tk) =
254 case tk of
255 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
256 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
257 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
258 TokenLink lnk -> Seq.singleton $
259 TreeN (cell "eref") $
260 xmlAttrs [cell ("to",lnk)]
261 TokenPair PairBracket ts | to <- Plain.textify ts
262 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
263 Seq.singleton $
264 TreeN (cell "rref") $
265 xmlAttrs [cell ("to",TL.toStrict to)]
266 TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ goTokens ts
267 TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ goTokens ts
268 TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ goTokens ts
269 TokenPair PairFrenchquote toks@ts ->
270 Seq.singleton $
271 TreeN (cell "q") $
272 case ts of
273 (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) ->
274 case Seq.viewr ls of
275 m :> Cell br er (TokenPlain r) ->
276 goTokens $
277 Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))
278 <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))
279 _ ->
280 goTokens $
281 Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls
282 (Seq.viewr -> rs :> Cell br er (TokenPlain r)) ->
283 goTokens $
284 rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r))
285 _ -> goTokens toks
286 TokenPair PairHash to ->
287 Seq.singleton $
288 TreeN (cell "ref") $
289 xmlAttrs [cell ("to",TL.toStrict $ Plain.textify to)]
290 TokenPair (PairElem name attrs) ts ->
291 Seq.singleton $
292 TreeN (cell $ xmlLocalName name) $
293 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) -> cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
294 goTokens ts
295 TokenPair p ts ->
296 let (o,c) = pairBorders p ts in
297 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
298 goTokens ts `unionXml`
299 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
300 where
301 cell :: a -> Cell a
302 cell = Cell bp ep
303
304 goTokens :: Tokens -> XMLs
305 goTokens toks =
306 case Seq.viewl toks of
307 Cell bp _ep (TokenPair PairParen paren)
308 :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket)
309 :< ts) ->
310 (<| goTokens ts) $
311 case bracket of
312 (toList -> [Cell bl el (TokenLink lnk)]) ->
313 TreeN (Cell bp eb "eref") $
314 xmlAttrs [Cell bl el ("to",lnk)] <>
315 goTokens paren
316 _ ->
317 TreeN (Cell bp eb "rref") $
318 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Plain.textify bracket)] <>
319 goTokens paren
320 t :< ts -> go t `unionXml` goTokens ts
321 Seq.EmptyL -> mempty
322
323 -- | Unify two 'XMLs', merging border 'XmlText's if any.
324 unionXml :: XMLs -> XMLs -> XMLs
325 unionXml x y =
326 case (Seq.viewr x, Seq.viewl y) of
327 (xs :> x0, y0 :< ys) ->
328 case (x0,y0) of
329 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
330 xs `unionXml`
331 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
332 ys
333 _ -> x <> y
334 (Seq.EmptyR, _) -> y
335 (_, Seq.EmptyL) -> x
336
337
338 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
339 spanlBar name = first unKeyBar . spanBar
340 where
341 unKeyBar :: TCTs -> TCTs
342 unKeyBar = (=<<) $ \case
343 TreeN (unCell -> KeyBar{}) ts -> ts
344 _ -> mempty
345 spanBar =
346 Seq.spanl $ \case
347 TreeN (unCell -> KeyBar n _) _ | n == name -> True
348 _ -> False
349
350 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
351 spanlItems liKey ts =
352 let (lis, ts') = spanLIs ts in
353 foldl' accumLIs (mempty,ts') lis
354 where
355 spanLIs = Seq.spanl $ \case
356 TreeN (unCell -> liKey -> True) _ -> True
357 Tree0 toks ->
358 (`any` toks) $ \case
359 (unCell -> TokenPair (PairElem "li" _) _) -> True
360 _ -> False
361 {-
362 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
363 [unCell -> TokenPair (PairElem "li" _) _] -> True
364 _ -> False
365 -}
366 _ -> False
367 accumLIs acc@(oks,kos) t =
368 case t of
369 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
370 Tree0 toks ->
371 let (ok,ko) =
372 (`Seq.spanl` toks) $ \tok ->
373 case unCell tok of
374 TokenPair (PairElem "li" _) _ -> True
375 TokenPlain txt -> Char.isSpace`Text.all`txt
376 _ -> False in
377 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
378 , if null ko then kos else Tree0 ko<|kos )
379 _ -> acc
380 rmTokenPlain =
381 Seq.filter $ \case
382 (unCell -> TokenPlain{}) -> False
383 _ -> True
384
385 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
386 spanlKeyColon name =
387 Seq.spanl $ \case
388 TreeN (unCell -> KeyBar n _) _ -> n == name
389 TreeN (unCell -> KeyGreat n _) _ -> n == name
390 _ -> False
391
392 spanlBrackets :: TCTs -> (TCTs, TCTs)
393 spanlBrackets =
394 Seq.spanl $ \case
395 TreeN (unCell -> KeyBrackets{}) _ -> True
396 _ -> False
397
398 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
399 spanlTokens =
400 first ((\case
401 Tree0 ts -> ts
402 _ -> undefined) <$>) .
403 Seq.spanl (\case
404 Tree0{} -> True
405 _ -> False)
406
407 getAttrId :: TCTs -> Text
408 getAttrId ts =
409 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
410 Just (Tree0 toks) -> TL.toStrict $ Plain.textify toks
411 _ -> ""
412
413 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
414 setXmlAttr a@(unCell -> (k, _v)) as =
415 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
416 Just idx -> Seq.update idx a as
417 Nothing -> a <| as
418
419 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
420 defXmlAttr a@(unCell -> (k, _v)) as =
421 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
422 Just _idx -> as
423 Nothing -> a <| as
424
425 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
426 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
427
428 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
429 partitionAttributesChildren ts = (attrs,cs)
430 where
431 (as,cs) = (`Seq.partition` ts) $ \case
432 TreeN (unCell -> KeyEqual{}) _cs -> True
433 _ -> False
434 attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as
435 attr = \case
436 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
437 Cell bp ep (xmlLocalName n, v)
438 where
439 v = TL.toStrict $
440 Plain.text def{Plain.state_escape = False} $
441 TreeSeq.mapAlsoNode (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a
442 _ -> undefined
443
444 elems :: Set Text
445 elems =
446 [ "about"
447 , "abstract"
448 , "address"
449 , "alias"
450 , "annotation"
451 , "area"
452 , "artwork"
453 , "aside"
454 , "audio"
455 , "author"
456 , "authors"
457 , "bcp14"
458 , "br"
459 , "call"
460 , "city"
461 , "code"
462 , "comment"
463 , "comments"
464 , "country"
465 , "date"
466 , "dd"
467 , "define"
468 , "del"
469 , "div"
470 , "dl"
471 , "document"
472 , "dt"
473 , "editor"
474 , "email"
475 , "embed"
476 , "eref"
477 , "fax"
478 , "feed"
479 , "feedback"
480 , "figure"
481 , "filter"
482 , "format"
483 , "from"
484 , "h"
485 , "hi"
486 , "html5"
487 , "i"
488 , "index"
489 , "iref"
490 , "keyword"
491 , "li"
492 , "link"
493 , "name"
494 , "note"
495 , "ol"
496 , "organization"
497 , "para"
498 , "postamble"
499 , "preamble"
500 , "q"
501 , "ref"
502 , "reference"
503 , "references"
504 , "region"
505 , "rref"
506 , "sc"
507 , "section"
508 , "serie"
509 , "source"
510 , "span"
511 , "street"
512 , "style"
513 , "sub"
514 , "sup"
515 , "table"
516 , "tbody"
517 , "td"
518 , "tel"
519 , "tfoot"
520 , "title"
521 , "th"
522 , "thead"
523 , "toc"
524 , "tof"
525 , "tr"
526 , "tt"
527 , "u"
528 , "ul"
529 , "uri"
530 , "version"
531 , "video"
532 , "workgroup"
533 , "xml"
534 , "zipcode"
535 ]