]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Factorize XML utilities.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.TCT.Write.XML where
9
10 import Control.Arrow (first)
11 import Control.Monad (Monad(..), (=<<))
12 import Data.Bool
13 import Data.Eq (Eq(..))
14 import Data.Foldable (null, foldl', any)
15 import Data.Function (($), (.), id)
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Maybe (Maybe(..), maybe)
18 import Data.Monoid (Monoid(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
21 import Data.Set (Set)
22 import Data.Text (Text)
23 import Data.TreeSeq.Strict (Tree(..))
24 import GHC.Exts (toList)
25 import Prelude (undefined)
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text as Text
30 import qualified Data.Text.Lazy as TL
31 import qualified Language.TCT.Write.Text as Write
32 import qualified System.FilePath as FP
33
34 import Text.Blaze.XML ()
35 import Language.TCT hiding (Parser)
36 import Language.XML
37 import qualified Data.TreeSeq.Strict as TreeSeq
38
39 -- * Type 'InhXml'
40 data InhXml
41 = InhXml
42 { inhXml_figure :: Bool
43 , inhXml_tree0 :: [Pos -> XMLs -> XML]
44 , inhXml_titles :: Seq Tokens
45 }
46 inhXml :: InhXml
47 inhXml = InhXml
48 { inhXml_figure = False
49 , inhXml_tree0 = []
50 , inhXml_titles = mempty
51 }
52
53 mimetype :: Text -> Maybe Text
54 mimetype "hs" = Just "text/x-haskell"
55 mimetype "sh" = Just "text/x-shellscript"
56 mimetype "shell" = Just "text/x-shellscript"
57 mimetype "shellscript" = Just "text/x-shellscript"
58 mimetype _ = Nothing
59
60 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
61 xmlPhantom n bp = TreeN (Cell bp bp n)
62 xmlPara :: Pos -> XMLs -> XML
63 xmlPara = xmlPhantom "para"
64 xmlTitle :: Pos -> XMLs -> XML
65 xmlTitle = xmlPhantom "title"
66 xmlName :: Pos -> XMLs -> XML
67 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
68 xmlName bp ts = xmlPhantom "name" bp ts
69
70 xmlDocument :: TCTs -> XMLs
71 xmlDocument trees =
72 case Seq.viewl trees of
73 TreeN (unCell -> KeySection{}) vs :< ts ->
74 case spanlTokens vs of
75 (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') ->
76 let vs'' =
77 case Seq.findIndexL
78 (\case
79 TreeN (unCell -> KeyColon "about" _) _ -> True
80 _ -> False) vs' of
81 Just{} -> vs'
82 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
83 in
84 xmlTCTs inhXml
85 { inhXml_titles = titles
86 , inhXml_figure = True
87 , inhXml_tree0 = List.repeat xmlPara
88 } vs'' <>
89 xmlTCTs inhXml ts
90 _ -> xmlTCTs inhXml trees
91 _ -> xmlTCTs inhXml trees
92
93 xmlTCTs :: InhXml -> TCTs -> XMLs
94 xmlTCTs inh_orig = go inh_orig
95 where
96 go :: InhXml -> TCTs -> XMLs
97 go inh trees =
98 case Seq.viewl trees of
99 TreeN (Cell bp ep (KeyBar n _)) _ :< _
100 | (body,ts) <- spanlBar n trees
101 , not (null body) ->
102 (<| go inh ts) $
103 TreeN (Cell bp ep "artwork") $
104 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
105 body >>= xmlTCT inh{inhXml_tree0=[]}
106
107 TreeN key@(unCell -> KeyColon n _) cs :< ts
108 | (cs',ts') <- spanlKeyColon n ts
109 , not (null cs') ->
110 go inh $ TreeN key (cs<>cs') <| ts'
111
112 TreeN (Cell bp ep KeyBrackets{}) _ :< _
113 | (rl,ts) <- spanlBrackets trees
114 , not (null rl) ->
115 (<| go inh ts) $
116 TreeN (Cell bp ep "rl") $
117 rl >>= xmlTCT inh_orig
118
119 _ | (ul,ts) <- spanlItems (==KeyDash) trees
120 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
121 (<| go inh ts) $
122 TreeN (Cell bp ep "ul") $
123 ul >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
124
125 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
126 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
127 (<| go inh ts) $
128 TreeN (Cell bp ep "ol") $
129 ol >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
130
131 t@(Tree0 toks) :< ts | isTokenElem toks ->
132 xmlTCT inh_orig t <>
133 go inh ts
134
135 t@(Tree0 toks) :< ts ->
136 case inhXml_tree0 inh of
137 [] ->
138 xmlTCT inh_orig t <>
139 go inh{inhXml_tree0=[]} ts
140 x:xs ->
141 case Seq.viewl toks of
142 EmptyL -> go inh{inhXml_tree0=xs} ts
143 Cell bp _ep _ :< _ ->
144 (<| go inh{inhXml_tree0=xs} ts) $
145 x bp $
146 xmlTCT inh_orig t
147
148 t:<ts ->
149 xmlTCT inh_orig t <>
150 go inh ts
151
152 _ -> mempty
153
154 xmlTCT :: InhXml -> TCT -> XMLs
155 xmlTCT inh tr =
156 case tr of
157 TreeN (Cell bp ep KeySection{}) ts ->
158 let (attrs,body) = partitionAttributesChildren ts in
159 let inh' = inh
160 { inhXml_tree0 = xmlTitle : List.repeat xmlPara
161 , inhXml_figure = True
162 } in
163 Seq.singleton $
164 TreeN (Cell bp ep "section") $
165 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
166 xmlTCTs inh' body
167
168 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
169 let (attrs,body) = partitionAttributesChildren ts in
170 let inh' = inh { inhXml_tree0 =
171 case kn of
172 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
173 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
174 "author" -> List.repeat xmlName
175 _ -> []
176 } in
177 case () of
178 _ | kn == "about" -> xmlAbout inh' key attrs body
179
180 _ | inhXml_figure inh && not (kn`List.elem`elems) ->
181 Seq.singleton $
182 TreeN (Cell bp ep "figure") $
183 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
184 case toList body of
185 [Tree0{}] -> xmlTCTs inh'{inhXml_tree0 = List.repeat xmlPara} body
186 _ -> xmlTCTs inh'{inhXml_tree0 = xmlTitle : List.repeat xmlPara} body
187
188 _ -> Seq.singleton $ xmlKey inh' key attrs body
189
190 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
191
192 Tree0 ts -> xmlTokens ts
193
194 xmlAbout ::
195 InhXml ->
196 Cell Key -> Seq (Cell (XmlName, Text)) ->
197 TCTs -> XMLs
198 xmlAbout inh key attrs body =
199 Seq.singleton $
200 xmlKey inh key attrs $
201 case Seq.viewl (inhXml_titles inh) of
202 (Seq.viewl -> Cell bt _et _ :< _) :< _ ->
203 ((<$> inhXml_titles inh) $ \title ->
204 TreeN (Cell bt bt $ KeyColon "title" "") $
205 Seq.singleton $ Tree0 title)
206 <> body
207 _ -> body
208
209 xmlKey :: InhXml -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
210 xmlKey inh (Cell bp ep key) attrs ts =
211 case key of
212 KeyColon n _wh -> d_key n
213 KeyGreat n _wh -> d_key n
214 KeyEqual n _wh -> d_key n
215 KeyBar n _wh -> d_key n
216 KeyDot _n -> TreeN (cell "li") $ xmlTCTs inh ts
217 KeyDash -> TreeN (cell "li") $ xmlTCTs inh ts
218 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
219 where
220 com :: TL.Text
221 com =
222 Write.text Write.config_text $
223 TreeSeq.mapAlsoKey
224 (cell1 . unCell)
225 (\_path -> fmap $ cell1 . unCell) <$> ts
226 KeyLower n as -> TreeN (cell "artwork") $ xmlTCTs inh ts
227 KeyBrackets ident ->
228 let inh' = inh{inhXml_figure = False} in
229 TreeN (cell "reference") $
230 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) attrs) <>
231 xmlTCTs inh' ts
232 KeyDotSlash p ->
233 TreeN (cell "include") $
234 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
235 xmlTCTs inh ts
236 where
237 cell :: a -> Cell a
238 cell = Cell bp ep
239 d_key :: Text -> XML
240 d_key n =
241 TreeN (cell $ xmlLocalName n) $
242 xmlAttrs attrs <>
243 xmlTCTs inh ts
244
245 xmlTokens :: Tokens -> XMLs
246 xmlTokens tok = goTokens tok
247 where
248 go :: Cell Token -> XMLs
249 go (Cell bp ep tk) =
250 case tk of
251 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
252 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
253 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
254 TokenLink lnk -> Seq.singleton $
255 TreeN (cell "eref") $
256 xmlAttrs [cell ("to",lnk)] |>
257 Tree0 (cell $ XmlText lnk)
258 TokenPair PairBracket ts | to <- Write.textTokens ts
259 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
260 Seq.singleton $
261 TreeN (cell "rref") $
262 xmlAttrs [cell ("to",TL.toStrict to)]
263 TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ goTokens ts
264 TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ goTokens ts
265 TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ goTokens ts
266 TokenPair PairFrenchquote toks@ts ->
267 Seq.singleton $
268 TreeN (cell "q") $
269 case ts of
270 (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) ->
271 case Seq.viewr ls of
272 m :> Cell br er (TokenPlain r) ->
273 goTokens $
274 Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))
275 <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))
276 _ ->
277 goTokens $
278 Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls
279 (Seq.viewr -> rs :> Cell br er (TokenPlain r)) ->
280 goTokens $
281 rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r))
282 _ -> goTokens toks
283 TokenPair PairHash to ->
284 Seq.singleton $
285 TreeN (cell "ref") $
286 xmlAttrs [cell ("to",TL.toStrict $ Write.textTokens to)]
287 TokenPair (PairElem name attrs) ts ->
288 Seq.singleton $
289 TreeN (cell $ xmlLocalName name) $
290 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) -> cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
291 goTokens ts
292 TokenPair p ts ->
293 let (o,c) = pairBorders p ts in
294 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
295 goTokens ts `unionXml`
296 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
297 where
298 cell :: a -> Cell a
299 cell = Cell bp ep
300
301 goTokens :: Tokens -> XMLs
302 goTokens toks =
303 case Seq.viewl toks of
304 Cell bp _ep (TokenPair PairParen paren)
305 :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket)
306 :< ts) ->
307 (<| goTokens ts) $
308 case bracket of
309 (toList -> [Cell bl el (TokenLink lnk)]) ->
310 TreeN (Cell bp eb "eref") $
311 xmlAttrs [Cell bl el ("to",lnk)] <>
312 goTokens paren
313 _ ->
314 TreeN (Cell bp eb "rref") $
315 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Write.textTokens bracket)] <>
316 goTokens paren
317 t :< ts -> go t `unionXml` goTokens ts
318 Seq.EmptyL -> mempty
319
320 -- | Unify two 'XMLs', merging border 'XmlText's if any.
321 unionXml :: XMLs -> XMLs -> XMLs
322 unionXml x y =
323 case (Seq.viewr x, Seq.viewl y) of
324 (xs :> x0, y0 :< ys) ->
325 case (x0,y0) of
326 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
327 xs `unionXml`
328 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
329 ys
330 _ -> x <> y
331 (Seq.EmptyR, _) -> y
332 (_, Seq.EmptyL) -> x
333
334
335 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
336 spanlBar name = first unKeyBar . spanBar
337 where
338 unKeyBar :: TCTs -> TCTs
339 unKeyBar = (=<<) $ \case
340 TreeN (unCell -> KeyBar{}) ts -> ts
341 _ -> mempty
342 spanBar =
343 Seq.spanl $ \case
344 TreeN (unCell -> KeyBar n _) _ | n == name -> True
345 _ -> False
346
347 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
348 spanlItems liKey ts =
349 let (lis, ts') = spanLIs ts in
350 foldl' accumLIs (mempty,ts') lis
351 where
352 spanLIs = Seq.spanl $ \case
353 TreeN (unCell -> liKey -> True) _ -> True
354 Tree0 toks ->
355 (`any` toks) $ \case
356 (unCell -> TokenPair (PairElem "li" _) _) -> True
357 _ -> False
358 {-
359 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
360 [unCell -> TokenPair (PairElem "li" _) _] -> True
361 _ -> False
362 -}
363 _ -> False
364 accumLIs acc@(oks,kos) t =
365 case t of
366 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
367 Tree0 toks ->
368 let (ok,ko) =
369 (`Seq.spanl` toks) $ \tok ->
370 case unCell tok of
371 TokenPair (PairElem "li" _) _ -> True
372 TokenPlain txt -> Char.isSpace`Text.all`txt
373 _ -> False in
374 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
375 , if null ko then kos else Tree0 ko<|kos )
376 _ -> acc
377 rmTokenPlain =
378 Seq.filter $ \case
379 (unCell -> TokenPlain{}) -> False
380 _ -> True
381
382 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
383 spanlKeyColon name =
384 Seq.spanl $ \case
385 TreeN (unCell -> KeyBar n _) _ -> n == name
386 TreeN (unCell -> KeyGreat n _) _ -> n == name
387 _ -> False
388
389 spanlBrackets :: TCTs -> (TCTs, TCTs)
390 spanlBrackets =
391 Seq.spanl $ \case
392 TreeN (unCell -> KeyBrackets{}) _ -> True
393 _ -> False
394
395 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
396 spanlTokens =
397 first ((\case
398 Tree0 ts -> ts
399 _ -> undefined) <$>) .
400 Seq.spanl (\case
401 Tree0{} -> True
402 _ -> False)
403
404 getAttrId :: TCTs -> Text
405 getAttrId ts =
406 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
407 Just (Tree0 toks) -> TL.toStrict $ Write.textTokens toks
408 _ -> ""
409
410 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
411 setXmlAttr a@(unCell -> (k, _v)) as =
412 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
413 Just idx -> Seq.update idx a as
414 Nothing -> a <| as
415
416 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
417 defXmlAttr a@(unCell -> (k, _v)) as =
418 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
419 Just _idx -> as
420 Nothing -> a <| as
421
422 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
423 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
424
425 {-
426 xmlAttr :: XmlAttrs -> (Text,Attr) -> XmlAttrs
427 xmlAttr acc (_,Attr{..}) = Map.insert (xmlLocalName attr_name) attr_value acc
428 -- TODO: conflict
429 -}
430
431 {-
432 d_Attributes :: XmlAttrs -> DTC -> DTC
433 d_Attributes = flip $ Map.foldrWithKey $ \n v ->
434 B.AddCustomAttribute (B.Text n) (B.Text v)
435 -}
436
437 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
438 partitionAttributesChildren ts = (attrs,cs)
439 where
440 (as,cs) = (`Seq.partition` ts) $ \case
441 TreeN (unCell -> KeyEqual{}) _cs -> True
442 _ -> False
443 attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as
444 attr = \case
445 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
446 Cell bp ep (xmlLocalName n, v)
447 where
448 v = TL.toStrict $
449 Write.text Write.config_text{Write.config_text_escape = False} $
450 TreeSeq.mapAlsoKey (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a
451 _ -> undefined
452
453 elems :: Set Text
454 elems =
455 [ "about"
456 , "abstract"
457 , "address"
458 , "alias"
459 , "annotation"
460 , "area"
461 , "artwork"
462 , "aside"
463 , "audio"
464 , "author"
465 , "authors"
466 , "bcp14"
467 , "br"
468 , "call"
469 , "city"
470 , "code"
471 , "comment"
472 , "comments"
473 , "country"
474 , "date"
475 , "dd"
476 , "define"
477 , "del"
478 , "div"
479 , "dl"
480 , "document"
481 , "dt"
482 , "editor"
483 , "email"
484 , "embed"
485 , "eref"
486 , "fax"
487 , "feed"
488 , "feedback"
489 , "figure"
490 , "filter"
491 , "format"
492 , "from"
493 , "h"
494 , "hi"
495 , "html5"
496 , "i"
497 , "index"
498 , "iref"
499 , "keyword"
500 , "li"
501 , "link"
502 , "name"
503 , "note"
504 , "ol"
505 , "organization"
506 , "para"
507 , "postamble"
508 , "preamble"
509 , "q"
510 , "ref"
511 , "reference"
512 , "region"
513 , "rl"
514 , "rref"
515 , "sc"
516 , "section"
517 , "serie"
518 , "source"
519 , "span"
520 , "street"
521 , "style"
522 , "sub"
523 , "sup"
524 , "table"
525 , "tbody"
526 , "td"
527 , "tel"
528 , "tfoot"
529 , "title"
530 , "th"
531 , "thead"
532 , "toc"
533 , "tof"
534 , "tr"
535 , "tt"
536 , "ul"
537 , "uri"
538 , "version"
539 , "video"
540 , "workgroup"
541 , "xml"
542 , "zipcode"
543 ]