]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Write/XML.hs
Update to megaparsec-7 and new symantic-xml
[doclang.git] / Hdoc / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE ViewPatterns #-}
5 {-# OPTIONS_GHC -fno-warn-orphans #-}
6 module Hdoc.TCT.Write.XML where
7
8 import Control.Applicative (Applicative(..))
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Default.Class (Default(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$), ($>))
16 import GHC.Exts (fromList)
17 import Data.List.NonEmpty (NonEmpty(..))
18 import Data.Maybe (Maybe(..), isJust, maybe, maybeToList)
19 import Data.Monoid (Monoid(..))
20 import Data.Ord (Ord(..))
21 import Data.Semigroup (Semigroup(..))
22 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
23 import Data.Set (Set)
24 import Data.String (String, IsString(..))
25 import Data.TreeSeq.Strict (Tree(..), tree0)
26 import Prelude (Num(..), undefined)
27 import System.FilePath as FilePath
28 import qualified Data.Char as Char
29 import qualified Data.List as List
30 import qualified Data.Set as Set
31 import qualified Data.Sequence as Seq
32 import qualified Data.Text.Lazy as TL
33 import qualified Hdoc.TCT.Write.Plain as Plain
34 import qualified Language.Symantic.XML as XML
35
36 -- import Hdoc.TCT.Debug
37 import Hdoc.TCT as TCT hiding (Parser)
38 import Hdoc.TCT.Utils
39 import Hdoc.XML (XML, XMLs)
40 import Text.Blaze.DTC (xmlns_dtc)
41 import Text.Blaze.XML ()
42
43 -- | Main entry point
44 --
45 -- NOTE: 'XmlNode' are still annotated with 'Sourced',
46 -- but nothing is done to preserve any ordering amongst them,
47 -- because 'Node's sometimes need to be reordered
48 -- (eg. about/title may have a title from the section before,
49 -- hence outside of about).
50 writeXML :: Roots -> XMLs
51 writeXML (tn@(Tree (Sourced src (NodeHeader (HeaderSection 1))) _ts) Seq.:<| rs) =
52 element src "head" (xmlifySection def tn) <|
53 xmlify def rs
54 writeXML roots = xmlify def roots
55
56 -- | Generate the content of <section> or <head>.
57 xmlifySection :: Inh -> Root -> XMLs
58 xmlifySection inh tn@(Tree (Sourced src _nt) _ts) =
59 about <>
60 xmlify inh' body
61 where
62 inh' = inh
63 { inh_para = List.repeat elementPara
64 , inh_figure = True
65 }
66 (titles, content) = partitionSection tn
67 (attrs, body) = partitionAttrs content
68 about =
69 case Seq.viewl titles of
70 EmptyL -> mempty
71 title@(unTree -> src_title) :< subtitles ->
72 (xmlAttrs (attrs `defaultAttr` (src_title $> (fromString "id", getAttrId title))) <>) $
73 Seq.singleton $
74 element src "about" $
75 xmlify inh{inh_para=List.repeat elementTitle} title <>
76 aliases
77 where
78 aliases =
79 subtitles >>= \subtitle@(unTree -> Sourced src_subtitle _) ->
80 return $
81 element src_subtitle "alias" $
82 xmlAttrs [src_title $> (fromString "id", getAttrId subtitle)] <>
83 xmlify inh{inh_para=List.repeat elementTitle} subtitle
84
85 -- * Type 'Inh'
86 data Inh
87 = Inh
88 { inh_figure :: Bool
89 , inh_para :: [Inh -> Root -> XML]
90 }
91 instance Default Inh where
92 def = Inh
93 { inh_figure = False
94 , inh_para = List.repeat elementPara
95 }
96
97 -- ** 'inh_para'
98 elementPara :: Inh -> Root -> XML
99 elementPara inh (Tree (Sourced src _) ts) = element src "para" $ xmlify inh ts
100
101 elementTitle :: Inh -> Root -> XML
102 elementTitle inh (Tree (Sourced src _) ts) =
103 element src "title" $
104 xmlify inh ts
105
106 elementTitleWith :: Attrs -> Inh -> Root -> XML
107 elementTitleWith attrs inh (Tree (Sourced src _) ts) =
108 element src "title" $
109 xmlAttrs attrs <> xmlify inh ts
110
111 elementName :: Inh -> Root -> XML
112 elementName inh (Tree (Sourced src _) ts) =
113 element src "name" $
114 xmlify inh ts
115
116 attributeName :: Inh -> Root -> XML
117 attributeName _inh (Tree (Sourced src _) ts) =
118 Tree (Sourced src $ XML.NodeAttr $ XML.qName $ fromString "name") $
119 return $ tree0 $ Sourced src $
120 XML.NodeText $ XML.escapeText $ Plain.writePlain ts
121
122 attributeId :: Inh -> Root -> XML
123 attributeId _inh (Tree (Sourced src _) ts) =
124 element src "id" $
125 return $ tree0 $ Sourced src $
126 XML.NodeText $ XML.escapeText $ Plain.writePlain ts
127
128 -- * Class 'Xmlify'
129 class Xmlify a where
130 xmlify :: Inh -> a -> XMLs
131 instance Xmlify Roots where
132 xmlify inh roots =
133 case Seq.viewl roots of
134 EmptyL -> mempty
135 selfR@(Tree cellSelf@(Sourced selfS nodeSelf) childrenR) :< fsR ->
136 case nodeSelf of
137 ----------------------
138 -- NOTE: HeaderColon becomes parent
139 -- of any continuous following-sibling HeaderBar or HeaderGreat
140 NodeHeader (HeaderColon n _wh)
141 | (span, rest) <- spanlHeaderColon fsR
142 , not $ null span ->
143 xmlify inh (Tree cellSelf (childrenR<>span)) <>
144 xmlify inh rest
145 where
146 spanlHeaderColon :: Roots -> (Roots, Roots)
147 spanlHeaderColon =
148 Seq.spanl $ \case
149 Tree (unSourced -> NodeHeader (HeaderBar m _)) _ -> m == n
150 Tree (unSourced -> NodeHeader (HeaderGreat m _)) _ -> m == n
151 _ -> False
152 ----------------------
153 -- NOTE: gather HeaderBrackets
154 NodeHeader HeaderBrackets{}
155 | (span,rest) <- spanlBrackets roots
156 , not (null span) ->
157 (<| xmlify inh rest) $
158 element selfS "references" $
159 span >>= xmlify inh
160 where
161 spanlBrackets :: Roots -> (Roots, Roots)
162 spanlBrackets =
163 Seq.spanl $ \case
164 Tree (unSourced -> NodeHeader HeaderBrackets{}) _ -> True
165 _ -> False
166 ----------------------
167 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
168 NodeText x
169 | Tree (cy@(unSourced -> NodeText y)) ys :< fsfsR <- Seq.viewl fsR ->
170 xmlify inh $ Tree (NodeText <$> (x <$ cellSelf) <> (y <$ cy)) (childrenR <> ys) <| fsfsR
171 ----------------------
172 -- NOTE: (Some Text)
173 NodePair PairParen
174 -- NOTE: (Some Text)[Some Ref]
175 | Tree (Sourced bracketS (NodePair PairBracket)) bracketR
176 Seq.:<| fsfsR <- fsR ->
177 xmlifyPairBracket inh (Just $ Sourced selfS childrenR)
178 Nothing
179 (Sourced bracketS bracketR) fsfsR
180 -- NOTE: (Some Text)@Some At@[Some Ref]
181 | Tree (Sourced atS (NodePair (PairAt False))) atR
182 Seq.:<| Tree (Sourced bracketS (NodePair PairBracket)) bracketR
183 Seq.:<| fsfsR <- fsR ->
184 xmlifyPairBracket inh (Just $ Sourced bracketS mempty)
185 (Just $ Sourced atS $ Plain.writePlain atR)
186 (Sourced bracketS bracketR) fsfsR
187 -- NOTE: (Some Text)@SomeAt[Some Ref]
188 | Tree (Sourced atS (NodeToken (TokenAt False textAt))) _
189 Seq.:<| Tree (Sourced bracketS (NodePair PairBracket)) bracketR
190 Seq.:<| fsfsR <- fsR ->
191 xmlifyPairBracket inh (Just $ Sourced bracketS mempty)
192 (Just $ Sourced atS textAt)
193 (Sourced bracketS bracketR) fsfsR
194 ----------------------
195 -- NOTE: [Some Ref]
196 NodePair PairBracket ->
197 xmlifyPairBracket inh Nothing
198 Nothing
199 (Sourced selfS childrenR) fsR
200 ----------------------
201 -- NOTE: @Some At@[Some Ref]
202 NodePair (PairAt False)
203 | Tree (Sourced bracketS (NodePair PairBracket)) bracketR
204 Seq.:<| fsfsR <- fsR ->
205 xmlifyPairBracket inh Nothing
206 (Just $ Sourced selfS $ Plain.writePlain childrenR)
207 (Sourced bracketS bracketR) fsfsR
208 ----------------------
209 -- NOTE: @SomeAt[Some Ref]
210 NodeToken (TokenAt False textAt)
211 | Tree (Sourced bracketS (NodePair PairBracket)) bracketR
212 Seq.:<| fsfsR <- fsR ->
213 xmlifyPairBracket inh Nothing
214 (Just $ Sourced selfS textAt)
215 (Sourced bracketS bracketR) fsfsR
216 ----------------------
217 -- NOTE: gather HeaderDash
218 _ | (span, rest) <- spanlItems (==HeaderDash) roots
219 , not $ null span ->
220 (<| xmlify inh rest) $
221 element selfS "ul" $
222 span >>= xmlify inh{inh_para=List.repeat elementPara}
223 ----------------------
224 -- NOTE: gather HeaderDot
225 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
226 , not $ null span ->
227 (<| xmlify inh rest) $
228 element selfS "ol" $
229 span >>= xmlify inh{inh_para=List.repeat elementPara}
230 where
231 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
232 spanlItems liHeader =
233 Seq.spanl $ \(unTree -> (unSourced -> nod)) ->
234 case nod of
235 NodeHeader (HeaderColon (Just (XML.NCName (TL.unpack -> "li"))) _wh) -> True
236 NodeHeader hdr -> liHeader hdr
237 NodePair (PairElem (XML.NCName (TL.unpack -> "li")) _as) -> True
238 _ -> False
239 ----------------------
240 NodePara | para:inh_para <- inh_para inh ->
241 para inh selfR <|
242 xmlify inh{inh_para} fsR
243 ----------------------
244 -- NOTE: context-free Root
245 _ ->
246 xmlify inh selfR `XML.union`
247 xmlify inh fsR
248 xmlifyPairBracket ::
249 Inh ->
250 Maybe (Cell Roots) ->
251 Maybe (Cell TL.Text) ->
252 Cell Roots ->
253 Roots -> XMLs
254 xmlifyPairBracket inh parenRM atRM
255 (Sourced bracketS bracketR) fsR =
256 case Plain.writePlain bracketR of
257 -- NOTE: [some://url]
258 _ | [unTree -> Sourced linkS (NodeToken (TokenLink link))] <- toList bracketR ->
259 (<| xmlify inh fsR) $
260 element elemS "eref" $
261 xmlAttrs ([Sourced linkS (fromString "to", link)] <> atAttrs) <>
262 xmlify inh (maybe mempty unSourced parenRM)
263 -- NOTE: @Some At@[Some Ref]
264 textPage | TL.any (=='/') textPage || isJust atRM ->
265 let page = TL.pack $ FilePath.normalise $ TL.unpack textPage in
266 (<| xmlify inh fsR) $
267 element elemS "page-ref" $
268 xmlAttrs ([Sourced bracketS (fromString "to", page)] <> atAttrs) <>
269 xmlify inh (maybe mempty unSourced parenRM)
270 -- NOTE: [Some Ref]
271 textRef ->
272 (<| xmlify inh fsR) $
273 element elemS "ref" $
274 xmlAttrs [Sourced bracketS (fromString "to", textRef)] <>
275 case parenRM of
276 Nothing -> mempty
277 Just (Sourced parenS parenR)
278 | null parenR -> -- NOTE: preserve empty parens
279 Seq.singleton $ tree0 (Sourced parenS $ XML.NodeText mempty)
280 | otherwise -> xmlify inh parenR
281 where
282 -- | Setting a correct Location improve error messages in parsing.
283 Sourced elemS () = sconcat $ fromList $ mconcat
284 [ maybeToList $ (() <$) <$> parenRM
285 , maybeToList $ (() <$) <$> atRM
286 , [Sourced bracketS ()]
287 ]
288 atAttrs = case atRM of
289 Just (Sourced atS textAt) | not $ TL.null textAt -> [Sourced atS (fromString "at", textAt)]
290 _ -> []
291 instance FromPad () where
292 fromPad _ = ()
293 {-
294 unionLocation :: Location -> Location -> Location
295 unionLocation (x:|xs) (y:|_ys) = (unionFileRange x y:|xs)
296 unionFileRange :: FileRange -> FileRange -> FileRange
297 unionFileRange (FileRange xf xb xe) (FileRange _yf yb ye) = FileRange xf xb ye
298 -}
299
300 instance Xmlify Root where
301 xmlify inh tn@(Tree (Sourced src@(sn:|ssn) nod) ts) =
302 case nod of
303 ----------------------
304 NodePara ->
305 case inh_para inh of
306 [] -> xmlify inh ts
307 para:_ -> Seq.singleton $ para inh tn
308 ----------------------
309 NodeHeader hdr ->
310 case hdr of
311 --
312 HeaderSection{} ->
313 Seq.singleton $
314 element src "section" $
315 xmlifySection inh tn
316 --
317 HeaderColon localName _wh ->
318 let (attrs, body) = partitionAttrs ts in
319 case name of
320 -- NOTE: disable 'inh_figure'
321 "about" ->
322 Seq.singleton $
323 element src "about" $
324 xmlAttrs attrs <>
325 xmlify inh'{inh_figure=False} body
326 -- NOTE: handle judgment
327 _ | Just lName <- localName
328 , lName`List.elem`elemsJudgment -> -- FIXME: not a special case so far.
329 Seq.singleton $
330 element src name $
331 xmlAttrs attrs <>
332 xmlify inh'' body
333 where
334 inh'' = inh'
335 { inh_para =
336 case name of
337 "grades" -> List.repeat attributeId
338 "judges" -> List.repeat attributeId
339 _ -> List.repeat elementTitle
340 }
341 -- NOTE: in <figure> mode, unreserved elements become <figure>
342 _ | Just lName <- localName
343 , inh_figure inh
344 && lName`List.notElem`elems || null name ->
345 Seq.singleton $
346 element src "figure" $
347 -- xmlAttrs (setAttr (Sourced en en ("type", XML.unNCName lName)) attrs) <>
348 xmlAttrs
349 (attrs `defaultAttr` Sourced (sn{fileRange_end=fileRange_begin sn}:|ssn)
350 (fromString "type", XML.unNCName lName)) <>
351 case toList body of
352 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
353 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
354 -- NOTE: reserved elements
355 _ ->
356 Seq.singleton $
357 element src name $
358 xmlAttrs attrs <>
359 xmlify inh' body
360 where
361 name = maybe mempty (TL.unpack . XML.unNCName) localName
362 inh' = inh
363 { inh_para =
364 case name of
365 "about" -> List.repeat elementTitle
366 "reference" -> List.repeat elementTitle
367 "serie" -> List.repeat attributeName
368 "author" -> List.repeat attributeName
369 "editor" -> List.repeat attributeName
370 "org" -> List.repeat attributeName
371 "note" -> List.repeat elementPara
372 _ -> []
373 }
374 --
375 HeaderBar localName wh ->
376 case localName of
377 Just lName
378 | inh_figure inh && lName`List.notElem`elems ->
379 xmlify inh $
380 Tree (Sourced src $ NodeHeader $ HeaderColon localName wh) ts
381 _ ->
382 Seq.singleton $
383 element src "artwork" $
384 xmlAttrs
385 (Seq.singleton $ Sourced (sn{fileRange_end=fileRange_end sn}:|ssn)
386 (fromString "type", maybe mempty XML.unNCName localName)) <>
387 xmlify inh{inh_para=[]} ts
388 --
389 HeaderGreat localName _wh ->
390 let (attrs,body) = partitionAttrs ts in
391 Seq.singleton $
392 element src "quote" $
393 xmlAttrs
394 (attrs `defaultAttr` Sourced (sn{fileRange_end=fileRange_end sn}:|ssn)
395 (fromString "type", maybe mempty XML.unNCName localName)) <>
396 xmlify inh{inh_para=List.repeat elementPara} body
397 --
398 HeaderEqual localName _wh ->
399 Seq.singleton $
400 Tree (Sourced src $ XML.NodeAttr (XML.qName localName)) $
401 return $ tree0 $ Sourced src $ XML.NodeText $
402 XML.escapeText $
403 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
404 --
405 HeaderDot n ->
406 Seq.singleton $
407 element src "li" $
408 let fileRange_end = (fileRange_begin sn)
409 { colNum= colNum (fileRange_begin sn) <> num (TL.length n) } in
410 xmlAttrs
411 (Seq.singleton $ Sourced (sn{fileRange_end}:|ssn)
412 (fromString "name", n)) <>
413 xmlify inh ts
414 --
415 HeaderDash -> Seq.singleton $ element src "li" $ xmlify inh ts
416 --
417 HeaderDashDash ->
418 Seq.singleton $ Tree0 $ Sourced src $
419 XML.NodeComment $ Plain.writePlain ts
420 --
421 HeaderBrackets ident ->
422 let (attrs, body) = partitionAttrs ts in
423 Seq.singleton $
424 element src "reference" $
425 xmlAttrs
426 (setAttr (Sourced (sn{fileRange_end=fileRange_end sn}:|ssn)
427 (fromString "id",ident)) attrs) |>
428 element src "about" (
429 xmlify inh'{inh_para = List.repeat elementTitle} body
430 )
431 where
432 inh' = inh{inh_figure = False}
433 --
434 HeaderDotSlash _file -> xmlify inh ts
435 ----------------------
436 NodePair pair ->
437 case pair of
438 PairBracket | to <- Plain.writePlain ts
439 , TL.all (\c -> c/='[' && c/=']'
440 && Char.isPrint c
441 && not (Char.isSpace c)) to ->
442 Seq.singleton $
443 element src "ref" $
444 xmlAttrs [Sourced src (fromString "to",to)]
445 PairStar -> Seq.singleton $ element src "b" $ xmlify inh ts
446 PairDash -> Seq.singleton $ element src "del" $ xmlify inh ts
447 PairUnderscore -> Seq.singleton $ element src "u" $ xmlify inh ts
448 PairSlash -> Seq.singleton $ element src "i" $ xmlify inh ts
449 PairBackquote -> Seq.singleton $ element src "code" $ xmlify inh ts
450 PairFrenchquote ->
451 Seq.singleton $
452 element src "q" $
453 case ts of
454 (Seq.viewl -> Tree0 (Sourced sl (NodeToken (TokenText l))) :< ls) ->
455 case Seq.viewr ls of
456 m :> Tree0 (Sourced sr (NodeToken (TokenText r0))) ->
457 xmlify inh $
458 Tree0 (Sourced sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l))))
459 Seq.<|(m Seq.|>Tree0 (Sourced sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r0)))))
460 _ ->
461 xmlify inh $
462 Tree0 (Sourced sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls
463 (Seq.viewr -> rs :> Tree0 (Sourced sr (NodeToken (TokenText r0)))) ->
464 xmlify inh $
465 rs Seq.|> Tree0 (Sourced sr (NodeToken (TokenText (TL.dropAround Char.isSpace r0))))
466 _ -> xmlify inh ts
467 PairTag isBackref ->
468 Seq.singleton $
469 element src (if isBackref then "tag-back" else "tag") $
470 xmlAttrs [Sourced src (fromString "to", Plain.writePlain ts)]
471 -- xmlAttrs [Sourced src ("to",to)]
472 -- xmlify inh{inh_para=[]} ts
473 -- xmlAttrs [Sourced src ("to",Plain.writePlain ts)]
474 PairAt isBackref ->
475 Seq.singleton $
476 element src (if isBackref then "at-back" else "at") $
477 xmlAttrs [Sourced src (fromString "to", Plain.writePlain ts)]
478 PairElem n attrs ->
479 Seq.singleton $
480 Tree (Sourced src $ XML.NodeElem $ XML.QName xmlns_dtc n) $
481 xmlAttrs (Seq.fromList $ (\(_wh, ElemAttr{..}) ->
482 Sourced src (elemAttr_name, elemAttr_value)) <$> attrs) <>
483 xmlify inh ts
484 _ ->
485 Seq.singleton (Tree0 $ Sourced (sn{fileRange_end=bn'}:|ssn) $
486 XML.NodeText (XML.EscapedText $ pure $ XML.EscapedPlain open)) `XML.union`
487 xmlify inh ts `XML.union`
488 Seq.singleton (Tree0 $ Sourced (sn{fileRange_begin=en'}:|ssn) $
489 XML.NodeText $ XML.EscapedText $ pure $ XML.EscapedPlain close)
490 where
491 (open, close) = pairBorders pair ts
492 bn' = (fileRange_begin sn){colNum=num $ colInt (fileRange_begin sn) + int (TL.length open)}
493 en' = (fileRange_end sn){colNum=num $ colInt (fileRange_end sn) - int (TL.length close)}
494 ----------------------
495 NodeText t -> Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText $ XML.escapeText t
496 ----------------------
497 NodeToken tok ->
498 case tok of
499 TokenEscape c -> Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText $ XML.EscapedText $ pure $ XML.escapeChar c
500 TokenText t -> Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText $ XML.escapeText t
501 TokenAt b to -> Seq.singleton $ element src (if b then "at-back" else "at") $
502 xmlAttrs [Sourced src (fromString "to", to)]
503 -- Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText to
504 TokenTag b to -> Seq.singleton $ element src (if b then "tag-back" else "tag") $
505 xmlAttrs [Sourced src (fromString "to", to)]
506 -- Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText to
507 TokenLink lnk -> Seq.singleton $ element src "eref" $
508 xmlAttrs [Sourced src (fromString "to", lnk)]
509 ----------------------
510 NodeLower n as ->
511 Seq.singleton $
512 element src "artwork" $
513 xmlify inh ts
514 instance Xmlify a => Xmlify (Maybe a) where
515 xmlify inh = \case
516 Nothing -> mempty
517 Just a -> xmlify inh a
518 {-
519 instance Xmlify (Seq (Cell (XML.QName,TL.Text))) where
520 xmlify _inh = xmlAttrs
521 -}
522
523 -- * Elements
524
525 element :: TCT.Location -> String -> XMLs -> XML
526 element src n = Tree (Sourced src $ XML.NodeElem $ XML.QName xmlns_dtc $ fromString n)
527
528 -- | Reserved elements' name
529 elems :: Set ElemName -- TODO: use a @data@ or maybe a @data family@ instead of 'TL.Text'
530 elems = Set.fromList $ fromString <$>
531 [ "about"
532 , "abstract"
533 , "address"
534 , "alias"
535 , "annotation"
536 , "area"
537 , "artwork"
538 , "aside"
539 , "at"
540 , "at-back"
541 , "audio"
542 , "author"
543 , "authors"
544 , "bcp14"
545 , "br"
546 , "break"
547 , "call"
548 , "city"
549 , "code"
550 , "comment"
551 , "comments"
552 , "country"
553 , "date"
554 , "dd"
555 , "default"
556 , "define"
557 , "del"
558 , "div"
559 , "dl"
560 , "document"
561 , "dt"
562 , "editor"
563 , "email"
564 , "embed"
565 , "eref"
566 , "fax"
567 , "feed"
568 , "feedback"
569 , "figure"
570 , "filter"
571 , "format"
572 , "from"
573 , "h"
574 , "head"
575 , "hi"
576 , "html5"
577 , "i"
578 , "index"
579 , "iref"
580 , "keyword"
581 , "li"
582 , "link"
583 , "name"
584 , "note"
585 , "ol"
586 , "organization"
587 , "para"
588 , "page-ref"
589 , "postamble"
590 , "preamble"
591 , "q"
592 , "ref"
593 , "reference"
594 , "references"
595 , "refs"
596 , "region"
597 , "sc"
598 , "section"
599 , "serie"
600 , "source"
601 , "span"
602 , "street"
603 , "style"
604 , "sub"
605 , "sup"
606 , "table"
607 , "tag"
608 , "tag-back"
609 , "tbody"
610 , "td"
611 , "tel"
612 , "tfoot"
613 , "th"
614 , "thead"
615 , "title"
616 , "toc"
617 , "tof"
618 , "tr"
619 , "tt"
620 , "u"
621 , "ul"
622 , "uri"
623 , "version"
624 , "video"
625 , "workgroup"
626 , "xml"
627 , "zipcode"
628 ]
629
630 elemsJudgment :: Set ElemName
631 elemsJudgment = Set.fromList $ fromString <$>
632 [ "choice"
633 , "grade"
634 , "grades"
635 , "judge"
636 , "judges"
637 , "judgment"
638 , "opinion"
639 ]
640
641 -- * Attributes
642 type Attrs = Seq (Cell (XML.NCName, TL.Text))
643
644 -- | Convenient alias, forcing the types
645 xmlAttrs :: Attrs -> XMLs
646 xmlAttrs =
647 (<$>) $ \(Sourced src (n, v)) ->
648 Tree (Sourced src $ XML.NodeAttr (XML.qName n)) $
649 Seq.singleton $ tree0 $
650 Sourced src $
651 XML.NodeText $ XML.escapeText v
652
653 -- | Extract section titles
654 partitionSection :: Root -> (Roots, Roots)
655 partitionSection (Tree (unSourced -> NodeHeader (HeaderSection lvlPar)) body) =
656 case Seq.viewl body of
657 EmptyL -> mempty
658 title@(unTree -> Sourced (FileRange{fileRange_end=et}:|_) NodePara) :< rest ->
659 let (subtitles, content) = spanlSubtitles et rest in
660 (title <| (subtitles >>= subTrees), content)
661 where
662 spanlSubtitles ep ts =
663 case Seq.viewl ts of
664 sub@(unTree -> Sourced (FileRange{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs
665 | lvlSub <= lvlPar
666 , lineInt fileRange_begin - lineInt ep <= 1 ->
667 let (subs, ts') = spanlSubtitles fileRange_end rs in
668 (sub <| subs, ts')
669 _ -> (mempty, ts)
670 _ -> (mempty, body)
671 partitionSection _ = mempty
672
673 -- | Extract attributes
674 partitionAttrs :: Roots -> (Attrs, Roots)
675 partitionAttrs ts = (attrs, cs)
676 where
677 (as,cs) = (`Seq.partition` ts) $ \case
678 Tree (unSourced -> NodeHeader (HeaderEqual (XML.NCName n) _wh)) _cs -> not $ TL.null n
679 _ -> False
680 attrs = attr <$> as
681 attr = \case
682 Tree (Sourced loc (NodeHeader (HeaderEqual n _wh))) a ->
683 Sourced loc (n, v)
684 where v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
685 _ -> undefined
686
687 getAttrId :: Root -> TL.Text
688 getAttrId = Plain.writePlain . Seq.singleton
689
690 setAttr :: Cell (XML.NCName, TL.Text) -> Attrs -> Attrs
691 setAttr a@(unSourced -> (k, _v)) as =
692 case Seq.findIndexL (\(unSourced -> (n,_v)) -> n == k) as of
693 Just idx -> Seq.update idx a as
694 Nothing -> a <| as
695
696 defaultAttr :: Attrs -> Cell (XML.NCName, TL.Text) -> Attrs
697 defaultAttr as a@(unSourced -> (k, _v)) =
698 case Seq.findIndexL (\(unSourced -> (n,_v)) -> n == k) as of
699 Just _idx -> as
700 Nothing -> a <| as