]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Prepare anchorify for references.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Arrow (first)
10 import Control.Monad (Monad(..), (=<<))
11 import Data.Bool
12 import Data.Eq (Eq(..))
13 import Data.Foldable (null, foldl', any)
14 import Data.Function (($), (.), id)
15 import Data.Functor (Functor(..), (<$>))
16 import Data.Maybe (Maybe(..), maybe)
17 import Data.Monoid (Monoid(..))
18 import Data.Semigroup (Semigroup(..))
19 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
20 import Data.Set (Set)
21 import Data.Text (Text)
22 import Data.TreeSeq.Strict (Tree(..))
23 import GHC.Exts (toList)
24 import Prelude (undefined)
25 import qualified Data.Char as Char
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text as Text
29 import qualified Data.Text.Lazy as TL
30 import qualified Language.TCT.Write.Text as Write
31 import qualified System.FilePath as FP
32
33 import Text.Blaze.XML ()
34 import Language.TCT hiding (Parser)
35 import Language.XML
36 import qualified Data.TreeSeq.Strict as TreeSeq
37
38 -- * Type 'InhXml'
39 data InhXml
40 = InhXml
41 { inhXml_figure :: Bool
42 , inhXml_tree0 :: [Pos -> XMLs -> XML]
43 , inhXml_titles :: Seq Tokens
44 }
45 inhXml :: InhXml
46 inhXml = InhXml
47 { inhXml_figure = False
48 , inhXml_tree0 = []
49 , inhXml_titles = mempty
50 }
51
52 mimetype :: Text -> Maybe Text
53 mimetype "hs" = Just "text/x-haskell"
54 mimetype "sh" = Just "text/x-shellscript"
55 mimetype "shell" = Just "text/x-shellscript"
56 mimetype "shellscript" = Just "text/x-shellscript"
57 mimetype _ = Nothing
58
59 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
60 xmlPhantom n bp = TreeN (Cell bp bp n)
61 xmlPara :: Pos -> XMLs -> XML
62 xmlPara = xmlPhantom "para"
63 xmlTitle :: Pos -> XMLs -> XML
64 xmlTitle = xmlPhantom "title"
65 xmlName :: Pos -> XMLs -> XML
66 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
67 xmlName bp ts = xmlPhantom "name" bp ts
68
69 xmlDocument :: TCTs -> XMLs
70 xmlDocument trees =
71 case Seq.viewl trees of
72 TreeN (unCell -> KeySection{}) vs :< ts ->
73 case spanlTokens vs of
74 (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') ->
75 let vs'' =
76 case Seq.findIndexL
77 (\case
78 TreeN (unCell -> KeyColon "about" _) _ -> True
79 _ -> False) vs' of
80 Just{} -> vs'
81 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
82 in
83 xmlTCTs inhXml
84 { inhXml_titles = titles
85 , inhXml_figure = True
86 , inhXml_tree0 = List.repeat xmlPara
87 } vs'' <>
88 xmlTCTs inhXml ts
89 _ -> xmlTCTs inhXml trees
90 _ -> xmlTCTs inhXml trees
91
92 xmlTCTs :: InhXml -> TCTs -> XMLs
93 xmlTCTs inh_orig = go inh_orig
94 where
95 go :: InhXml -> TCTs -> XMLs
96 go inh trees =
97 case Seq.viewl trees of
98 TreeN (Cell bp ep (KeyBar n _)) _ :< _
99 | (body,ts) <- spanlBar n trees
100 , not (null body) ->
101 (<| go inh ts) $
102 TreeN (Cell bp ep "artwork") $
103 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
104 body >>= xmlTCT inh{inhXml_tree0=[]}
105
106 TreeN key@(unCell -> KeyColon n _) cs :< ts
107 | (cs',ts') <- spanlKeyColon n ts
108 , not (null cs') ->
109 go inh $ TreeN key (cs<>cs') <| ts'
110
111 TreeN (Cell bp ep KeyBrackets{}) _ :< _
112 | (rl,ts) <- spanlBrackets trees
113 , not (null rl) ->
114 (<| go inh ts) $
115 TreeN (Cell bp ep "rl") $
116 rl >>= xmlTCT inh_orig
117
118 _ | (ul,ts) <- spanlItems (==KeyDash) trees
119 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
120 (<| go inh ts) $
121 TreeN (Cell bp ep "ul") $
122 ul >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
123
124 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
125 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
126 (<| go inh ts) $
127 TreeN (Cell bp ep "ol") $
128 ol >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
129
130 t@(Tree0 toks) :< ts | isTokenElem toks ->
131 xmlTCT inh_orig t <>
132 go inh ts
133
134 t@(Tree0 toks) :< ts ->
135 case inhXml_tree0 inh of
136 [] ->
137 xmlTCT inh_orig t <>
138 go inh{inhXml_tree0=[]} ts
139 x:xs ->
140 case Seq.viewl toks of
141 EmptyL -> go inh{inhXml_tree0=xs} ts
142 Cell bp _ep _ :< _ ->
143 (<| go inh{inhXml_tree0=xs} ts) $
144 x bp $
145 xmlTCT inh_orig t
146
147 t:<ts ->
148 xmlTCT inh_orig t <>
149 go inh ts
150
151 _ -> mempty
152
153 xmlTCT :: InhXml -> TCT -> XMLs
154 xmlTCT inh tr =
155 case tr of
156 TreeN (Cell bp ep KeySection{}) ts ->
157 let (attrs,body) = partitionAttributesChildren ts in
158 let inh' = inh
159 { inhXml_tree0 = xmlTitle : List.repeat xmlPara
160 , inhXml_figure = True
161 } in
162 Seq.singleton $
163 TreeN (Cell bp ep "section") $
164 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
165 xmlTCTs inh' body
166
167 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
168 let (attrs,body) = partitionAttributesChildren ts in
169 let inh' = inh { inhXml_tree0 =
170 case kn of
171 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
172 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
173 "author" -> List.repeat xmlName
174 _ -> []
175 } in
176 case () of
177 _ | kn == "about" -> xmlAbout inh' key attrs body
178
179 _ | inhXml_figure inh && not (kn`List.elem`elems) ->
180 Seq.singleton $
181 TreeN (Cell bp ep "figure") $
182 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
183 case toList body of
184 [Tree0{}] -> xmlTCTs inh'{inhXml_tree0 = List.repeat xmlPara} body
185 _ -> xmlTCTs inh'{inhXml_tree0 = xmlTitle : List.repeat xmlPara} body
186
187 _ -> Seq.singleton $ xmlKey inh' key attrs body
188
189 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
190
191 Tree0 ts -> xmlTokens ts
192
193 xmlAbout ::
194 InhXml ->
195 Cell Key -> Seq (Cell (XmlName, Text)) ->
196 TCTs -> XMLs
197 xmlAbout inh key attrs body =
198 Seq.singleton $
199 xmlKey inh key attrs $
200 case Seq.viewl (inhXml_titles inh) of
201 (Seq.viewl -> Cell bt _et _ :< _) :< _ ->
202 ((<$> inhXml_titles inh) $ \title ->
203 TreeN (Cell bt bt $ KeyColon "title" "") $
204 Seq.singleton $ Tree0 title)
205 <> body
206 _ -> body
207
208 xmlKey :: InhXml -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
209 xmlKey inh (Cell bp ep key) attrs ts =
210 case key of
211 KeyColon n _wh -> d_key n
212 KeyGreat n _wh -> d_key n
213 KeyEqual n _wh -> d_key n
214 KeyBar n _wh -> d_key n
215 KeyDot _n -> TreeN (cell "li") $ xmlTCTs inh ts
216 KeyDash -> TreeN (cell "li") $ xmlTCTs inh ts
217 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
218 where
219 com :: TL.Text
220 com =
221 Write.text Write.config_text $
222 TreeSeq.mapAlsoKey
223 (cell1 . unCell)
224 (\_path -> fmap $ cell1 . unCell) <$> ts
225 KeyLower n as -> TreeN (cell "artwork") $ xmlTCTs inh ts
226 KeyBrackets ident ->
227 let inh' = inh{inhXml_figure = False} in
228 let (attrs',body) = partitionAttributesChildren ts in
229 TreeN (cell "reference") $
230 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) (attrs<>attrs')) <>
231 xmlTCTs inh' body
232 KeyDotSlash p ->
233 TreeN (cell "include") $
234 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
235 xmlTCTs inh ts
236 where
237 cell :: a -> Cell a
238 cell = Cell bp ep
239 d_key :: Text -> XML
240 d_key n =
241 TreeN (cell $ xmlLocalName n) $
242 xmlAttrs attrs <>
243 xmlTCTs inh ts
244
245 xmlTokens :: Tokens -> XMLs
246 xmlTokens tok = goTokens tok
247 where
248 go :: Cell Token -> XMLs
249 go (Cell bp ep tk) =
250 case tk of
251 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
252 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
253 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
254 TokenLink lnk -> Seq.singleton $
255 TreeN (cell "eref") $
256 xmlAttrs [cell ("to",lnk)] |>
257 Tree0 (cell $ XmlText lnk)
258 TokenPair PairBracket ts | to <- Write.textTokens ts
259 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
260 Seq.singleton $
261 TreeN (cell "rref") $
262 xmlAttrs [cell ("to",TL.toStrict to)]
263 TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ goTokens ts
264 TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ goTokens ts
265 TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ goTokens ts
266 TokenPair PairFrenchquote toks@ts ->
267 Seq.singleton $
268 TreeN (cell "q") $
269 case ts of
270 (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) ->
271 case Seq.viewr ls of
272 m :> Cell br er (TokenPlain r) ->
273 goTokens $
274 Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))
275 <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))
276 _ ->
277 goTokens $
278 Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls
279 (Seq.viewr -> rs :> Cell br er (TokenPlain r)) ->
280 goTokens $
281 rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r))
282 _ -> goTokens toks
283 TokenPair PairHash to ->
284 Seq.singleton $
285 TreeN (cell "ref") $
286 xmlAttrs [cell ("to",TL.toStrict $ Write.textTokens to)]
287 TokenPair (PairElem name attrs) ts ->
288 Seq.singleton $
289 TreeN (cell $ xmlLocalName name) $
290 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) -> cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
291 goTokens ts
292 TokenPair p ts ->
293 let (o,c) = pairBorders p ts in
294 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
295 goTokens ts `unionXml`
296 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
297 where
298 cell :: a -> Cell a
299 cell = Cell bp ep
300
301 goTokens :: Tokens -> XMLs
302 goTokens toks =
303 case Seq.viewl toks of
304 Cell bp _ep (TokenPair PairParen paren)
305 :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket)
306 :< ts) ->
307 (<| goTokens ts) $
308 case bracket of
309 (toList -> [Cell bl el (TokenLink lnk)]) ->
310 TreeN (Cell bp eb "eref") $
311 xmlAttrs [Cell bl el ("to",lnk)] <>
312 goTokens paren
313 _ ->
314 TreeN (Cell bp eb "rref") $
315 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Write.textTokens bracket)] <>
316 goTokens paren
317 t :< ts -> go t `unionXml` goTokens ts
318 Seq.EmptyL -> mempty
319
320 -- | Unify two 'XMLs', merging border 'XmlText's if any.
321 unionXml :: XMLs -> XMLs -> XMLs
322 unionXml x y =
323 case (Seq.viewr x, Seq.viewl y) of
324 (xs :> x0, y0 :< ys) ->
325 case (x0,y0) of
326 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
327 xs `unionXml`
328 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
329 ys
330 _ -> x <> y
331 (Seq.EmptyR, _) -> y
332 (_, Seq.EmptyL) -> x
333
334
335 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
336 spanlBar name = first unKeyBar . spanBar
337 where
338 unKeyBar :: TCTs -> TCTs
339 unKeyBar = (=<<) $ \case
340 TreeN (unCell -> KeyBar{}) ts -> ts
341 _ -> mempty
342 spanBar =
343 Seq.spanl $ \case
344 TreeN (unCell -> KeyBar n _) _ | n == name -> True
345 _ -> False
346
347 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
348 spanlItems liKey ts =
349 let (lis, ts') = spanLIs ts in
350 foldl' accumLIs (mempty,ts') lis
351 where
352 spanLIs = Seq.spanl $ \case
353 TreeN (unCell -> liKey -> True) _ -> True
354 Tree0 toks ->
355 (`any` toks) $ \case
356 (unCell -> TokenPair (PairElem "li" _) _) -> True
357 _ -> False
358 {-
359 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
360 [unCell -> TokenPair (PairElem "li" _) _] -> True
361 _ -> False
362 -}
363 _ -> False
364 accumLIs acc@(oks,kos) t =
365 case t of
366 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
367 Tree0 toks ->
368 let (ok,ko) =
369 (`Seq.spanl` toks) $ \tok ->
370 case unCell tok of
371 TokenPair (PairElem "li" _) _ -> True
372 TokenPlain txt -> Char.isSpace`Text.all`txt
373 _ -> False in
374 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
375 , if null ko then kos else Tree0 ko<|kos )
376 _ -> acc
377 rmTokenPlain =
378 Seq.filter $ \case
379 (unCell -> TokenPlain{}) -> False
380 _ -> True
381
382 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
383 spanlKeyColon name =
384 Seq.spanl $ \case
385 TreeN (unCell -> KeyBar n _) _ -> n == name
386 TreeN (unCell -> KeyGreat n _) _ -> n == name
387 _ -> False
388
389 spanlBrackets :: TCTs -> (TCTs, TCTs)
390 spanlBrackets =
391 Seq.spanl $ \case
392 TreeN (unCell -> KeyBrackets{}) _ -> True
393 _ -> False
394
395 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
396 spanlTokens =
397 first ((\case
398 Tree0 ts -> ts
399 _ -> undefined) <$>) .
400 Seq.spanl (\case
401 Tree0{} -> True
402 _ -> False)
403
404 getAttrId :: TCTs -> Text
405 getAttrId ts =
406 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
407 Just (Tree0 toks) -> TL.toStrict $ Write.textTokens toks
408 _ -> ""
409
410 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
411 setXmlAttr a@(unCell -> (k, _v)) as =
412 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
413 Just idx -> Seq.update idx a as
414 Nothing -> a <| as
415
416 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
417 defXmlAttr a@(unCell -> (k, _v)) as =
418 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
419 Just _idx -> as
420 Nothing -> a <| as
421
422 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
423 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
424
425 {-
426 xmlAttr :: XmlAttrs -> (Text,Attr) -> XmlAttrs
427 xmlAttr acc (_,Attr{..}) = Map.insert (xmlLocalName attr_name) attr_value acc
428 -- TODO: conflict
429 -}
430
431 {-
432 d_Attributes :: XmlAttrs -> DTC -> DTC
433 d_Attributes = flip $ Map.foldrWithKey $ \n v ->
434 B.AddCustomAttribute (B.Text n) (B.Text v)
435 -}
436
437 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
438 partitionAttributesChildren ts = (attrs,cs)
439 where
440 (as,cs) = (`Seq.partition` ts) $ \case
441 TreeN (unCell -> KeyEqual{}) _cs -> True
442 _ -> False
443 attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as
444 attr = \case
445 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
446 Cell bp ep (xmlLocalName n, v)
447 where
448 v = TL.toStrict $
449 Write.text Write.config_text{Write.config_text_escape = False} $
450 TreeSeq.mapAlsoKey (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a
451 _ -> undefined
452
453 elems :: Set Text
454 elems =
455 [ "about"
456 , "abstract"
457 , "address"
458 , "alias"
459 , "annotation"
460 , "area"
461 , "artwork"
462 , "aside"
463 , "audio"
464 , "author"
465 , "authors"
466 , "bcp14"
467 , "br"
468 , "call"
469 , "city"
470 , "code"
471 , "comment"
472 , "comments"
473 , "country"
474 , "date"
475 , "dd"
476 , "define"
477 , "del"
478 , "div"
479 , "dl"
480 , "document"
481 , "dt"
482 , "editor"
483 , "email"
484 , "embed"
485 , "eref"
486 , "fax"
487 , "feed"
488 , "feedback"
489 , "figure"
490 , "filter"
491 , "format"
492 , "from"
493 , "h"
494 , "hi"
495 , "html5"
496 , "i"
497 , "index"
498 , "iref"
499 , "keyword"
500 , "li"
501 , "link"
502 , "name"
503 , "note"
504 , "ol"
505 , "organization"
506 , "para"
507 , "postamble"
508 , "preamble"
509 , "q"
510 , "ref"
511 , "reference"
512 , "region"
513 , "rl"
514 , "rref"
515 , "sc"
516 , "section"
517 , "serie"
518 , "source"
519 , "span"
520 , "street"
521 , "style"
522 , "sub"
523 , "sup"
524 , "table"
525 , "tbody"
526 , "td"
527 , "tel"
528 , "tfoot"
529 , "title"
530 , "th"
531 , "thead"
532 , "toc"
533 , "tof"
534 , "tr"
535 , "tt"
536 , "u"
537 , "ul"
538 , "uri"
539 , "version"
540 , "video"
541 , "workgroup"
542 , "xml"
543 , "zipcode"
544 ]