]> Git — Sourcephile - doclang.git/blob - src/Textphile/TCT/Write/XML.hs
Fix megaparsec-8 update
[doclang.git] / src / Textphile / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE TypeFamilies #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Textphile.TCT.Write.XML where
8
9 import Control.Applicative (Applicative(..))
10 import Control.Monad (Monad(..))
11 import Data.Bool
12 import Data.Default.Class (Default(..))
13 import Data.Eq (Eq(..))
14 import Data.Foldable (Foldable(..))
15 import Data.Function (($), (.))
16 import Data.Functor ((<$>), (<$), ($>))
17 import GHC.Exts (fromList)
18 import Data.List.NonEmpty (NonEmpty(..))
19 import Data.Maybe (Maybe(..), isJust, maybe, maybeToList)
20 import Data.Monoid (Monoid(..))
21 import Data.Ord (Ord(..))
22 import Data.Semigroup (Semigroup(..))
23 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
24 import Data.Set (Set)
25 import Data.String (String, IsString(..))
26 import Data.TreeSeq.Strict (Tree(..), tree0)
27 import Prelude (Num(..), undefined)
28 import System.FilePath as FilePath
29 import qualified Data.Char as Char
30 import qualified Data.List as List
31 import qualified Data.Set as Set
32 import qualified Data.Sequence as Seq
33 import qualified Data.Text.Lazy as TL
34 import qualified Textphile.TCT.Write.Plain as Plain
35 import qualified Symantic.XML as XML
36
37 -- import Textphile.TCT.Debug
38 import Textphile.TCT as TCT hiding (Parser)
39 import Textphile.TCT.Utils
40 import Textphile.XML (XML, XMLs)
41 import Text.Blaze.DTC (xmlns_dtc)
42 import Text.Blaze.XML ()
43
44 -- | Main entry point
45 --
46 -- NOTE: 'XmlNode' are still annotated with 'Sourced',
47 -- but nothing is done to preserve any ordering amongst them,
48 -- because 'Node's sometimes need to be reordered
49 -- (eg. about/title may have a title from the section before,
50 -- hence outside of about).
51 writeXML :: Roots -> XMLs
52 writeXML (tn@(Tree (Sourced src (NodeHeader (HeaderSection 1))) _ts) Seq.:<| rs) =
53 element src "head" (xmlifySection def tn) <|
54 xmlify def rs
55 writeXML roots = xmlify def roots
56
57 -- | Generate the content of <section> or <head>.
58 xmlifySection :: Inh -> Root -> XMLs
59 xmlifySection inh tn@(Tree (Sourced src _nt) _ts) =
60 about <>
61 xmlify inh' body
62 where
63 inh' = inh
64 { inh_para = List.repeat elementPara
65 , inh_figure = True
66 }
67 (titles, content) = partitionSection tn
68 (attrs, body) = partitionAttrs content
69 about =
70 case Seq.viewl titles of
71 EmptyL -> mempty
72 title@(unTree -> src_title) :< subtitles ->
73 (xmlAttrs (attrs `defaultAttr` (src_title $> (fromString "id", getAttrId title))) <>) $
74 Seq.singleton $
75 element src "about" $
76 xmlify inh{inh_para=List.repeat elementTitle} title <>
77 aliases
78 where
79 aliases =
80 subtitles >>= \subtitle@(unTree -> Sourced src_subtitle _) ->
81 return $
82 element src_subtitle "alias" $
83 xmlAttrs [src_title $> (fromString "id", getAttrId subtitle)] <>
84 xmlify inh{inh_para=List.repeat elementTitle} subtitle
85
86 -- * Type 'Inh'
87 data Inh
88 = Inh
89 { inh_figure :: Bool
90 , inh_para :: [Inh -> Root -> XML]
91 }
92 instance Default Inh where
93 def = Inh
94 { inh_figure = False
95 , inh_para = List.repeat elementPara
96 }
97
98 -- ** 'inh_para'
99 elementPara :: Inh -> Root -> XML
100 elementPara inh (Tree (Sourced src _) ts) = element src "para" $ xmlify inh ts
101
102 elementTitle :: Inh -> Root -> XML
103 elementTitle inh (Tree (Sourced src _) ts) =
104 element src "title" $
105 xmlify inh ts
106
107 elementTitleWith :: Attrs -> Inh -> Root -> XML
108 elementTitleWith attrs inh (Tree (Sourced src _) ts) =
109 element src "title" $
110 xmlAttrs attrs <> xmlify inh ts
111
112 elementName :: Inh -> Root -> XML
113 elementName inh (Tree (Sourced src _) ts) =
114 element src "name" $
115 xmlify inh ts
116
117 attributeName :: Inh -> Root -> XML
118 attributeName _inh (Tree (Sourced src _) ts) =
119 Tree (Sourced src $ XML.NodeAttr $ XML.qName $ fromString "name") $
120 return $ tree0 $ Sourced src $
121 XML.NodeText $ XML.escapeText $ Plain.writePlain ts
122
123 attributeId :: Inh -> Root -> XML
124 attributeId _inh (Tree (Sourced src _) ts) =
125 element src "id" $
126 return $ tree0 $ Sourced src $
127 XML.NodeText $ XML.escapeText $ Plain.writePlain ts
128
129 -- * Class 'Xmlify'
130 class Xmlify a where
131 xmlify :: Inh -> a -> XMLs
132 instance Xmlify Roots where
133 xmlify inh roots =
134 case Seq.viewl roots of
135 EmptyL -> mempty
136 selfR@(Tree cellSelf@(Sourced selfS nodeSelf) childrenR) :< fsR ->
137 case nodeSelf of
138 ----------------------
139 -- NOTE: HeaderColon becomes parent
140 -- of any continuous following-sibling HeaderBar or HeaderGreat
141 NodeHeader (HeaderColon n _wh)
142 | (span, rest) <- spanlHeaderColon fsR
143 , not $ null span ->
144 xmlify inh (Tree cellSelf (childrenR<>span)) <>
145 xmlify inh rest
146 where
147 spanlHeaderColon :: Roots -> (Roots, Roots)
148 spanlHeaderColon =
149 Seq.spanl $ \case
150 Tree (unSourced -> NodeHeader (HeaderBar m _)) _ -> m == n
151 Tree (unSourced -> NodeHeader (HeaderGreat m _)) _ -> m == n
152 _ -> False
153 ----------------------
154 -- NOTE: gather HeaderBrackets
155 NodeHeader HeaderBrackets{}
156 | (span,rest) <- spanlBrackets roots
157 , not (null span) ->
158 (<| xmlify inh rest) $
159 element selfS "references" $
160 span >>= xmlify inh
161 where
162 spanlBrackets :: Roots -> (Roots, Roots)
163 spanlBrackets =
164 Seq.spanl $ \case
165 Tree (unSourced -> NodeHeader HeaderBrackets{}) _ -> True
166 _ -> False
167 ----------------------
168 -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case.
169 NodeText x
170 | Tree (cy@(unSourced -> NodeText y)) ys :< fsfsR <- Seq.viewl fsR ->
171 xmlify inh $ Tree (NodeText <$> (x <$ cellSelf) <> (y <$ cy)) (childrenR <> ys) <| fsfsR
172 ----------------------
173 -- NOTE: (Some Text)
174 NodePair PairParen
175 -- NOTE: (Some Text)[Some Ref]
176 | Tree (Sourced bracketS (NodePair PairBracket)) bracketR
177 Seq.:<| fsfsR <- fsR ->
178 xmlifyPairBracket inh (Just $ Sourced selfS childrenR)
179 Nothing
180 (Sourced bracketS bracketR) fsfsR
181 -- NOTE: (Some Text)@Some At@[Some Ref]
182 | Tree (Sourced atS (NodePair (PairAt False))) atR
183 Seq.:<| Tree (Sourced bracketS (NodePair PairBracket)) bracketR
184 Seq.:<| fsfsR <- fsR ->
185 xmlifyPairBracket inh (Just $ Sourced bracketS mempty)
186 (Just $ Sourced atS $ Plain.writePlain atR)
187 (Sourced bracketS bracketR) fsfsR
188 -- NOTE: (Some Text)@SomeAt[Some Ref]
189 | Tree (Sourced atS (NodeToken (TokenAt False textAt))) _
190 Seq.:<| Tree (Sourced bracketS (NodePair PairBracket)) bracketR
191 Seq.:<| fsfsR <- fsR ->
192 xmlifyPairBracket inh (Just $ Sourced bracketS mempty)
193 (Just $ Sourced atS textAt)
194 (Sourced bracketS bracketR) fsfsR
195 ----------------------
196 -- NOTE: [Some Ref]
197 NodePair PairBracket ->
198 xmlifyPairBracket inh Nothing
199 Nothing
200 (Sourced selfS childrenR) fsR
201 ----------------------
202 -- NOTE: @Some At@[Some Ref]
203 NodePair (PairAt False)
204 | Tree (Sourced bracketS (NodePair PairBracket)) bracketR
205 Seq.:<| fsfsR <- fsR ->
206 xmlifyPairBracket inh Nothing
207 (Just $ Sourced selfS $ Plain.writePlain childrenR)
208 (Sourced bracketS bracketR) fsfsR
209 ----------------------
210 -- NOTE: @SomeAt[Some Ref]
211 NodeToken (TokenAt False textAt)
212 | Tree (Sourced bracketS (NodePair PairBracket)) bracketR
213 Seq.:<| fsfsR <- fsR ->
214 xmlifyPairBracket inh Nothing
215 (Just $ Sourced selfS textAt)
216 (Sourced bracketS bracketR) fsfsR
217 ----------------------
218 -- NOTE: gather HeaderDash
219 _ | (span, rest) <- spanlItems (==HeaderDash) roots
220 , not $ null span ->
221 (<| xmlify inh rest) $
222 element selfS "ul" $
223 span >>= xmlify inh{inh_para=List.repeat elementPara}
224 ----------------------
225 -- NOTE: gather HeaderDot
226 | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots
227 , not $ null span ->
228 (<| xmlify inh rest) $
229 element selfS "ol" $
230 span >>= xmlify inh{inh_para=List.repeat elementPara}
231 where
232 spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots)
233 spanlItems liHeader =
234 Seq.spanl $ \(unTree -> (unSourced -> nod)) ->
235 case nod of
236 NodeHeader (HeaderColon (Just (XML.NCName (TL.unpack -> "li"))) _wh) -> True
237 NodeHeader hdr -> liHeader hdr
238 NodePair (PairElem (XML.NCName (TL.unpack -> "li")) _as) -> True
239 _ -> False
240 ----------------------
241 NodePara | para:inh_para <- inh_para inh ->
242 para inh selfR <|
243 xmlify inh{inh_para} fsR
244 ----------------------
245 -- NOTE: context-free Root
246 _ ->
247 xmlify inh selfR `XML.union`
248 xmlify inh fsR
249 xmlifyPairBracket ::
250 Inh ->
251 Maybe (Cell Roots) ->
252 Maybe (Cell TL.Text) ->
253 Cell Roots ->
254 Roots -> XMLs
255 xmlifyPairBracket inh parenRM atRM
256 (Sourced bracketS bracketR) fsR =
257 case Plain.writePlain bracketR of
258 -- NOTE: [some://url]
259 _ | [unTree -> Sourced linkS (NodeToken (TokenLink link))] <- toList bracketR ->
260 (<| xmlify inh fsR) $
261 element elemS "eref" $
262 xmlAttrs ([Sourced linkS (fromString "to", link)] <> atAttrs) <>
263 xmlify inh (maybe mempty unSourced parenRM)
264 -- NOTE: @Some At@[Some Ref]
265 textPage | TL.any (=='/') textPage || isJust atRM ->
266 let page = TL.pack $ FilePath.normalise $ TL.unpack textPage in
267 (<| xmlify inh fsR) $
268 element elemS "page-ref" $
269 xmlAttrs ([Sourced bracketS (fromString "to", page)] <> atAttrs) <>
270 xmlify inh (maybe mempty unSourced parenRM)
271 -- NOTE: [Some Ref]
272 textRef ->
273 (<| xmlify inh fsR) $
274 element elemS "ref" $
275 xmlAttrs [Sourced bracketS (fromString "to", textRef)] <>
276 case parenRM of
277 Nothing -> mempty
278 Just (Sourced parenS parenR)
279 | null parenR -> -- NOTE: preserve empty parens
280 Seq.singleton $ tree0 (Sourced parenS $ XML.NodeText mempty)
281 | otherwise -> xmlify inh parenR
282 where
283 -- | Setting a correct Location improve error messages in parsing.
284 Sourced elemS () = sconcat $ fromList $ mconcat
285 [ maybeToList $ (() <$) <$> parenRM
286 , maybeToList $ (() <$) <$> atRM
287 , [Sourced bracketS ()]
288 ]
289 atAttrs = case atRM of
290 Just (Sourced atS textAt) | not $ TL.null textAt -> [Sourced atS (fromString "at", textAt)]
291 _ -> []
292 instance FromPad () where
293 fromPad _ = ()
294 {-
295 unionLocation :: Location -> Location -> Location
296 unionLocation (x:|xs) (y:|_ys) = (unionFileRange x y:|xs)
297 unionFileRange :: FileRange -> FileRange -> FileRange
298 unionFileRange (FileRange xf xb xe) (FileRange _yf yb ye) = FileRange xf xb ye
299 -}
300
301 instance Xmlify Root where
302 xmlify inh tn@(Tree (Sourced src@(sn:|ssn) nod) ts) =
303 case nod of
304 ----------------------
305 NodePara ->
306 case inh_para inh of
307 [] -> xmlify inh ts
308 para:_ -> Seq.singleton $ para inh tn
309 ----------------------
310 NodeHeader hdr ->
311 case hdr of
312 --
313 HeaderSection{} ->
314 Seq.singleton $
315 element src "section" $
316 xmlifySection inh tn
317 --
318 HeaderColon localName _wh ->
319 let (attrs, body) = partitionAttrs ts in
320 case name of
321 -- NOTE: disable 'inh_figure'
322 "about" ->
323 Seq.singleton $
324 element src "about" $
325 xmlAttrs attrs <>
326 xmlify inh'{inh_figure=False} body
327 -- NOTE: handle judgment
328 _ | Just lName <- localName
329 , lName`List.elem`elemsJudgment -> -- FIXME: not a special case so far.
330 Seq.singleton $
331 element src name $
332 xmlAttrs attrs <>
333 xmlify inh'' body
334 where
335 inh'' = inh'
336 { inh_para =
337 case name of
338 "grades" -> List.repeat attributeId
339 "judges" -> List.repeat attributeId
340 _ -> List.repeat elementTitle
341 }
342 -- NOTE: in <figure> mode, unreserved elements become <figure>
343 _ | Just lName <- localName
344 , inh_figure inh
345 && lName`List.notElem`elems || null name ->
346 Seq.singleton $
347 element src "figure" $
348 -- xmlAttrs (setAttr (Sourced en en ("type", XML.unNCName lName)) attrs) <>
349 xmlAttrs
350 (attrs `defaultAttr` Sourced (sn{fileRange_end=fileRange_begin sn}:|ssn)
351 (fromString "type", XML.unNCName lName)) <>
352 case toList body of
353 [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body
354 _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body
355 -- NOTE: reserved elements
356 _ ->
357 Seq.singleton $
358 element src name $
359 xmlAttrs attrs <>
360 xmlify inh' body
361 where
362 name = maybe mempty (TL.unpack . XML.unNCName) localName
363 inh' = inh
364 { inh_para =
365 case name of
366 "about" -> List.repeat elementTitle
367 "reference" -> List.repeat elementTitle
368 "serie" -> List.repeat attributeName
369 "author" -> List.repeat attributeName
370 "editor" -> List.repeat attributeName
371 "org" -> List.repeat attributeName
372 "note" -> List.repeat elementPara
373 _ -> []
374 }
375 --
376 HeaderBar localName wh ->
377 case localName of
378 Just lName
379 | inh_figure inh && lName`List.notElem`elems ->
380 xmlify inh $
381 Tree (Sourced src $ NodeHeader $ HeaderColon localName wh) ts
382 _ ->
383 Seq.singleton $
384 element src "artwork" $
385 xmlAttrs
386 (Seq.singleton $ Sourced (sn{fileRange_end=fileRange_end sn}:|ssn)
387 (fromString "type", maybe mempty XML.unNCName localName)) <>
388 xmlify inh{inh_para=[]} ts
389 --
390 HeaderGreat localName _wh ->
391 let (attrs,body) = partitionAttrs ts in
392 Seq.singleton $
393 element src "quote" $
394 xmlAttrs
395 (attrs `defaultAttr` Sourced (sn{fileRange_end=fileRange_end sn}:|ssn)
396 (fromString "type", maybe mempty XML.unNCName localName)) <>
397 xmlify inh{inh_para=List.repeat elementPara} body
398 --
399 HeaderEqual localName _wh ->
400 Seq.singleton $
401 Tree (Sourced src $ XML.NodeAttr (XML.qName localName)) $
402 return $ tree0 $ Sourced src $ XML.NodeText $
403 XML.escapeText $
404 Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts
405 --
406 HeaderDot n ->
407 Seq.singleton $
408 element src "li" $
409 let fileRange_end = (fileRange_begin sn)
410 { colNum= colNum (fileRange_begin sn) <> num (TL.length n) } in
411 xmlAttrs
412 (Seq.singleton $ Sourced (sn{fileRange_end}:|ssn)
413 (fromString "name", n)) <>
414 xmlify inh ts
415 --
416 HeaderDash -> Seq.singleton $ element src "li" $ xmlify inh ts
417 --
418 HeaderDashDash ->
419 Seq.singleton $ Tree0 $ Sourced src $
420 XML.NodeComment $ Plain.writePlain ts
421 --
422 HeaderBrackets ident ->
423 let (attrs, body) = partitionAttrs ts in
424 Seq.singleton $
425 element src "reference" $
426 xmlAttrs
427 (setAttr (Sourced (sn{fileRange_end=fileRange_end sn}:|ssn)
428 (fromString "id",ident)) attrs) |>
429 element src "about" (
430 xmlify inh'{inh_para = List.repeat elementTitle} body
431 )
432 where
433 inh' = inh{inh_figure = False}
434 --
435 HeaderDotSlash _file -> xmlify inh ts
436 ----------------------
437 NodePair pair ->
438 case pair of
439 PairBracket | to <- Plain.writePlain ts
440 , TL.all (\c -> c/='[' && c/=']'
441 && Char.isPrint c
442 && not (Char.isSpace c)) to ->
443 Seq.singleton $
444 element src "ref" $
445 xmlAttrs [Sourced src (fromString "to",to)]
446 PairStar -> Seq.singleton $ element src "b" $ xmlify inh ts
447 PairDash -> Seq.singleton $ element src "del" $ xmlify inh ts
448 PairUnderscore -> Seq.singleton $ element src "u" $ xmlify inh ts
449 PairSlash -> Seq.singleton $ element src "i" $ xmlify inh ts
450 PairBackquote -> Seq.singleton $ element src "code" $ xmlify inh ts
451 PairFrenchquote ->
452 Seq.singleton $
453 element src "q" $
454 case ts of
455 (Seq.viewl -> Tree0 (Sourced sl (NodeToken (TokenText l))) :< ls) ->
456 case Seq.viewr ls of
457 m :> Tree0 (Sourced sr (NodeToken (TokenText r0))) ->
458 xmlify inh $
459 Tree0 (Sourced sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l))))
460 Seq.<|(m Seq.|>Tree0 (Sourced sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r0)))))
461 _ ->
462 xmlify inh $
463 Tree0 (Sourced sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls
464 (Seq.viewr -> rs :> Tree0 (Sourced sr (NodeToken (TokenText r0)))) ->
465 xmlify inh $
466 rs Seq.|> Tree0 (Sourced sr (NodeToken (TokenText (TL.dropAround Char.isSpace r0))))
467 _ -> xmlify inh ts
468 PairTag isBackref ->
469 Seq.singleton $
470 element src (if isBackref then "tag-back" else "tag") $
471 xmlAttrs [Sourced src (fromString "to", Plain.writePlain ts)]
472 -- xmlAttrs [Sourced src ("to",to)]
473 -- xmlify inh{inh_para=[]} ts
474 -- xmlAttrs [Sourced src ("to",Plain.writePlain ts)]
475 PairAt isBackref ->
476 Seq.singleton $
477 element src (if isBackref then "at-back" else "at") $
478 xmlAttrs [Sourced src (fromString "to", Plain.writePlain ts)]
479 PairElem n attrs ->
480 Seq.singleton $
481 Tree (Sourced src $ XML.NodeElem $ XML.QName xmlns_dtc n) $
482 xmlAttrs (Seq.fromList $ (\(_wh, ElemAttr{..}) ->
483 Sourced src (elemAttr_name, elemAttr_value)) <$> attrs) <>
484 xmlify inh ts
485 _ ->
486 Seq.singleton (Tree0 $ Sourced (sn{fileRange_end=bn'}:|ssn) $
487 XML.NodeText (XML.EscapedText $ pure $ XML.EscapedPlain open)) `XML.union`
488 xmlify inh ts `XML.union`
489 Seq.singleton (Tree0 $ Sourced (sn{fileRange_begin=en'}:|ssn) $
490 XML.NodeText $ XML.EscapedText $ pure $ XML.EscapedPlain close)
491 where
492 (open, close) = pairBorders pair ts
493 bn' = (fileRange_begin sn){colNum=num $ colInt (fileRange_begin sn) + int (TL.length open)}
494 en' = (fileRange_end sn){colNum=num $ colInt (fileRange_end sn) - int (TL.length close)}
495 ----------------------
496 NodeText t -> Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText $ XML.escapeText t
497 ----------------------
498 NodeToken tok ->
499 case tok of
500 TokenEscape c -> Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText $ XML.EscapedText $ pure $ XML.escapeChar c
501 TokenText t -> Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText $ XML.escapeText t
502 TokenAt b to -> Seq.singleton $ element src (if b then "at-back" else "at") $
503 xmlAttrs [Sourced src (fromString "to", to)]
504 -- Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText to
505 TokenTag b to -> Seq.singleton $ element src (if b then "tag-back" else "tag") $
506 xmlAttrs [Sourced src (fromString "to", to)]
507 -- Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText to
508 TokenLink lnk -> Seq.singleton $ element src "eref" $
509 xmlAttrs [Sourced src (fromString "to", lnk)]
510 ----------------------
511 NodeLower n as ->
512 Seq.singleton $
513 element src "artwork" $
514 xmlify inh ts
515 instance Xmlify a => Xmlify (Maybe a) where
516 xmlify inh = \case
517 Nothing -> mempty
518 Just a -> xmlify inh a
519 {-
520 instance Xmlify (Seq (Cell (XML.QName,TL.Text))) where
521 xmlify _inh = xmlAttrs
522 -}
523
524 -- * Elements
525
526 element :: TCT.Location -> String -> XMLs -> XML
527 element src n = Tree (Sourced src $ XML.NodeElem $ XML.QName xmlns_dtc $ fromString n)
528
529 -- | Reserved elements' name
530 elems :: Set ElemName -- TODO: use a @data@ or maybe a @data family@ instead of 'TL.Text'
531 elems = Set.fromList $ fromString <$>
532 [ "about"
533 , "abstract"
534 , "address"
535 , "alias"
536 , "annotation"
537 , "area"
538 , "artwork"
539 , "aside"
540 , "at"
541 , "at-back"
542 , "audio"
543 , "author"
544 , "authors"
545 , "bcp14"
546 , "br"
547 , "break"
548 , "call"
549 , "city"
550 , "code"
551 , "comment"
552 , "comments"
553 , "country"
554 , "date"
555 , "dd"
556 , "default"
557 , "define"
558 , "del"
559 , "div"
560 , "dl"
561 , "document"
562 , "dt"
563 , "editor"
564 , "email"
565 , "embed"
566 , "eref"
567 , "fax"
568 , "feed"
569 , "feedback"
570 , "figure"
571 , "filter"
572 , "format"
573 , "from"
574 , "h"
575 , "head"
576 , "hi"
577 , "html5"
578 , "i"
579 , "index"
580 , "iref"
581 , "keyword"
582 , "li"
583 , "link"
584 , "name"
585 , "note"
586 , "ol"
587 , "organization"
588 , "para"
589 , "page-ref"
590 , "postamble"
591 , "preamble"
592 , "q"
593 , "ref"
594 , "reference"
595 , "references"
596 , "refs"
597 , "region"
598 , "sc"
599 , "section"
600 , "serie"
601 , "source"
602 , "span"
603 , "street"
604 , "style"
605 , "sub"
606 , "sup"
607 , "table"
608 , "tag"
609 , "tag-back"
610 , "tbody"
611 , "td"
612 , "tel"
613 , "tfoot"
614 , "th"
615 , "thead"
616 , "title"
617 , "toc"
618 , "tof"
619 , "tr"
620 , "tt"
621 , "u"
622 , "ul"
623 , "uri"
624 , "version"
625 , "video"
626 , "workgroup"
627 , "xml"
628 , "zipcode"
629 ]
630
631 elemsJudgment :: Set ElemName
632 elemsJudgment = Set.fromList $ fromString <$>
633 [ "choice"
634 , "grade"
635 , "grades"
636 , "judge"
637 , "judges"
638 , "judgment"
639 , "opinion"
640 ]
641
642 -- * Attributes
643 type Attrs = Seq (Cell (XML.NCName, TL.Text))
644
645 -- | Convenient alias, forcing the types
646 xmlAttrs :: Attrs -> XMLs
647 xmlAttrs =
648 (<$>) $ \(Sourced src (n, v)) ->
649 Tree (Sourced src $ XML.NodeAttr (XML.qName n)) $
650 Seq.singleton $ tree0 $
651 Sourced src $
652 XML.NodeText $ XML.escapeText v
653
654 -- | Extract section titles
655 partitionSection :: Root -> (Roots, Roots)
656 partitionSection (Tree (unSourced -> NodeHeader (HeaderSection lvlPar)) body) =
657 case Seq.viewl body of
658 EmptyL -> mempty
659 title@(unTree -> Sourced (FileRange{fileRange_end=et}:|_) NodePara) :< rest ->
660 let (subtitles, content) = spanlSubtitles et rest in
661 (title <| (subtitles >>= subTrees), content)
662 where
663 spanlSubtitles ep ts =
664 case Seq.viewl ts of
665 sub@(unTree -> Sourced (FileRange{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs
666 | lvlSub <= lvlPar
667 , lineInt fileRange_begin - lineInt ep <= 1 ->
668 let (subs, ts') = spanlSubtitles fileRange_end rs in
669 (sub <| subs, ts')
670 _ -> (mempty, ts)
671 _ -> (mempty, body)
672 partitionSection _ = mempty
673
674 -- | Extract attributes
675 partitionAttrs :: Roots -> (Attrs, Roots)
676 partitionAttrs ts = (attrs, cs)
677 where
678 (as,cs) = (`Seq.partition` ts) $ \case
679 Tree (unSourced -> NodeHeader (HeaderEqual (XML.NCName n) _wh)) _cs -> not $ TL.null n
680 _ -> False
681 attrs = attr <$> as
682 attr = \case
683 Tree (Sourced loc (NodeHeader (HeaderEqual n _wh))) a ->
684 Sourced loc (n, v)
685 where v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a
686 _ -> undefined
687
688 getAttrId :: Root -> TL.Text
689 getAttrId = Plain.writePlain . Seq.singleton
690
691 setAttr :: Cell (XML.NCName, TL.Text) -> Attrs -> Attrs
692 setAttr a@(unSourced -> (k, _v)) as =
693 case Seq.findIndexL (\(unSourced -> (n,_v)) -> n == k) as of
694 Just idx -> Seq.update idx a as
695 Nothing -> a <| as
696
697 defaultAttr :: Attrs -> Cell (XML.NCName, TL.Text) -> Attrs
698 defaultAttr as a@(unSourced -> (k, _v)) =
699 case Seq.findIndexL (\(unSourced -> (n,_v)) -> n == k) as of
700 Just _idx -> as
701 Nothing -> a <| as