{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Hdoc.TCT.Write.XML where import Control.Applicative (Applicative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.)) import Data.Functor ((<$>), (<$), ($>)) import GHC.Exts (fromList) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..), isJust, maybe, maybeToList) import Data.Monoid (Monoid(..)) import Data.Ord (Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>)) import Data.Set (Set) import Data.String (String, IsString(..)) import Data.TreeSeq.Strict (Tree(..), tree0) import Prelude (Num(..), undefined) import System.FilePath as FilePath import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Set as Set import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import qualified Hdoc.TCT.Write.Plain as Plain import qualified Language.Symantic.XML as XML -- import Hdoc.TCT.Debug import Hdoc.TCT as TCT hiding (Parser) import Hdoc.TCT.Utils import Hdoc.XML (XML, XMLs) import Text.Blaze.DTC (xmlns_dtc) import Text.Blaze.XML () -- | Main entry point -- -- NOTE: 'XmlNode' are still annotated with 'Sourced', -- but nothing is done to preserve any ordering amongst them, -- because 'Node's sometimes need to be reordered -- (eg. about/title may have a title from the section before, -- hence outside of about). writeXML :: Roots -> XMLs writeXML (tn@(Tree (Sourced src (NodeHeader (HeaderSection 1))) _ts) Seq.:<| rs) = element src "head" (xmlifySection def tn) <| xmlify def rs writeXML roots = xmlify def roots -- | Generate the content of
or . xmlifySection :: Inh -> Root -> XMLs xmlifySection inh tn@(Tree (Sourced src _nt) _ts) = about <> xmlify inh' body where inh' = inh { inh_para = List.repeat elementPara , inh_figure = True } (titles, content) = partitionSection tn (attrs, body) = partitionAttrs content about = case Seq.viewl titles of EmptyL -> mempty title@(unTree -> src_title) :< subtitles -> (xmlAttrs (attrs `defaultAttr` (src_title $> (fromString "id", getAttrId title))) <>) $ Seq.singleton $ element src "about" $ xmlify inh{inh_para=List.repeat elementTitle} title <> aliases where aliases = subtitles >>= \subtitle@(unTree -> Sourced src_subtitle _) -> return $ element src_subtitle "alias" $ xmlAttrs [src_title $> (fromString "id", getAttrId subtitle)] <> xmlify inh{inh_para=List.repeat elementTitle} subtitle -- * Type 'Inh' data Inh = Inh { inh_figure :: Bool , inh_para :: [Inh -> Root -> XML] } instance Default Inh where def = Inh { inh_figure = False , inh_para = List.repeat elementPara } -- ** 'inh_para' elementPara :: Inh -> Root -> XML elementPara inh (Tree (Sourced src _) ts) = element src "para" $ xmlify inh ts elementTitle :: Inh -> Root -> XML elementTitle inh (Tree (Sourced src _) ts) = element src "title" $ xmlify inh ts elementTitleWith :: Attrs -> Inh -> Root -> XML elementTitleWith attrs inh (Tree (Sourced src _) ts) = element src "title" $ xmlAttrs attrs <> xmlify inh ts elementName :: Inh -> Root -> XML elementName inh (Tree (Sourced src _) ts) = element src "name" $ xmlify inh ts attributeName :: Inh -> Root -> XML attributeName _inh (Tree (Sourced src _) ts) = Tree (Sourced src $ XML.NodeAttr $ XML.qName $ fromString "name") $ return $ tree0 $ Sourced src $ XML.NodeText $ XML.escapeText $ Plain.writePlain ts attributeId :: Inh -> Root -> XML attributeId _inh (Tree (Sourced src _) ts) = element src "id" $ return $ tree0 $ Sourced src $ XML.NodeText $ XML.escapeText $ Plain.writePlain ts -- * Class 'Xmlify' class Xmlify a where xmlify :: Inh -> a -> XMLs instance Xmlify Roots where xmlify inh roots = case Seq.viewl roots of EmptyL -> mempty selfR@(Tree cellSelf@(Sourced selfS nodeSelf) childrenR) :< fsR -> case nodeSelf of ---------------------- -- NOTE: HeaderColon becomes parent -- of any continuous following-sibling HeaderBar or HeaderGreat NodeHeader (HeaderColon n _wh) | (span, rest) <- spanlHeaderColon fsR , not $ null span -> xmlify inh (Tree cellSelf (childrenR<>span)) <> xmlify inh rest where spanlHeaderColon :: Roots -> (Roots, Roots) spanlHeaderColon = Seq.spanl $ \case Tree (unSourced -> NodeHeader (HeaderBar m _)) _ -> m == n Tree (unSourced -> NodeHeader (HeaderGreat m _)) _ -> m == n _ -> False ---------------------- -- NOTE: gather HeaderBrackets NodeHeader HeaderBrackets{} | (span,rest) <- spanlBrackets roots , not (null span) -> (<| xmlify inh rest) $ element selfS "references" $ span >>= xmlify inh where spanlBrackets :: Roots -> (Roots, Roots) spanlBrackets = Seq.spanl $ \case Tree (unSourced -> NodeHeader HeaderBrackets{}) _ -> True _ -> False ---------------------- -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case. NodeText x | Tree (cy@(unSourced -> NodeText y)) ys :< fsfsR <- Seq.viewl fsR -> xmlify inh $ Tree (NodeText <$> (x <$ cellSelf) <> (y <$ cy)) (childrenR <> ys) <| fsfsR ---------------------- -- NOTE: (Some Text) NodePair PairParen -- NOTE: (Some Text)[Some Ref] | Tree (Sourced bracketS (NodePair PairBracket)) bracketR Seq.:<| fsfsR <- fsR -> xmlifyPairBracket inh (Just $ Sourced selfS childrenR) Nothing (Sourced bracketS bracketR) fsfsR -- NOTE: (Some Text)@Some At@[Some Ref] | Tree (Sourced atS (NodePair (PairAt False))) atR Seq.:<| Tree (Sourced bracketS (NodePair PairBracket)) bracketR Seq.:<| fsfsR <- fsR -> xmlifyPairBracket inh (Just $ Sourced bracketS mempty) (Just $ Sourced atS $ Plain.writePlain atR) (Sourced bracketS bracketR) fsfsR -- NOTE: (Some Text)@SomeAt[Some Ref] | Tree (Sourced atS (NodeToken (TokenAt False textAt))) _ Seq.:<| Tree (Sourced bracketS (NodePair PairBracket)) bracketR Seq.:<| fsfsR <- fsR -> xmlifyPairBracket inh (Just $ Sourced bracketS mempty) (Just $ Sourced atS textAt) (Sourced bracketS bracketR) fsfsR ---------------------- -- NOTE: [Some Ref] NodePair PairBracket -> xmlifyPairBracket inh Nothing Nothing (Sourced selfS childrenR) fsR ---------------------- -- NOTE: @Some At@[Some Ref] NodePair (PairAt False) | Tree (Sourced bracketS (NodePair PairBracket)) bracketR Seq.:<| fsfsR <- fsR -> xmlifyPairBracket inh Nothing (Just $ Sourced selfS $ Plain.writePlain childrenR) (Sourced bracketS bracketR) fsfsR ---------------------- -- NOTE: @SomeAt[Some Ref] NodeToken (TokenAt False textAt) | Tree (Sourced bracketS (NodePair PairBracket)) bracketR Seq.:<| fsfsR <- fsR -> xmlifyPairBracket inh Nothing (Just $ Sourced selfS textAt) (Sourced bracketS bracketR) fsfsR ---------------------- -- NOTE: gather HeaderDash _ | (span, rest) <- spanlItems (==HeaderDash) roots , not $ null span -> (<| xmlify inh rest) $ element selfS "ul" $ span >>= xmlify inh{inh_para=List.repeat elementPara} ---------------------- -- NOTE: gather HeaderDot | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots , not $ null span -> (<| xmlify inh rest) $ element selfS "ol" $ span >>= xmlify inh{inh_para=List.repeat elementPara} where spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots) spanlItems liHeader = Seq.spanl $ \(unTree -> (unSourced -> nod)) -> case nod of NodeHeader (HeaderColon (Just (XML.NCName (TL.unpack -> "li"))) _wh) -> True NodeHeader hdr -> liHeader hdr NodePair (PairElem (XML.NCName (TL.unpack -> "li")) _as) -> True _ -> False ---------------------- NodePara | para:inh_para <- inh_para inh -> para inh selfR <| xmlify inh{inh_para} fsR ---------------------- -- NOTE: context-free Root _ -> xmlify inh selfR `XML.union` xmlify inh fsR xmlifyPairBracket :: Inh -> Maybe (Cell Roots) -> Maybe (Cell TL.Text) -> Cell Roots -> Roots -> XMLs xmlifyPairBracket inh parenRM atRM (Sourced bracketS bracketR) fsR = case Plain.writePlain bracketR of -- NOTE: [some://url] _ | [unTree -> Sourced linkS (NodeToken (TokenLink link))] <- toList bracketR -> (<| xmlify inh fsR) $ element elemS "eref" $ xmlAttrs ([Sourced linkS (fromString "to", link)] <> atAttrs) <> xmlify inh (maybe mempty unSourced parenRM) -- NOTE: @Some At@[Some Ref] textPage | TL.any (=='/') textPage || isJust atRM -> let page = TL.pack $ FilePath.normalise $ TL.unpack textPage in (<| xmlify inh fsR) $ element elemS "page-ref" $ xmlAttrs ([Sourced bracketS (fromString "to", page)] <> atAttrs) <> xmlify inh (maybe mempty unSourced parenRM) -- NOTE: [Some Ref] textRef -> (<| xmlify inh fsR) $ element elemS "ref" $ xmlAttrs [Sourced bracketS (fromString "to", textRef)] <> case parenRM of Nothing -> mempty Just (Sourced parenS parenR) | null parenR -> -- NOTE: preserve empty parens Seq.singleton $ tree0 (Sourced parenS $ XML.NodeText mempty) | otherwise -> xmlify inh parenR where -- | Setting a correct Location improve error messages in parsing. Sourced elemS () = sconcat $ fromList $ mconcat [ maybeToList $ (() <$) <$> parenRM , maybeToList $ (() <$) <$> atRM , [Sourced bracketS ()] ] atAttrs = case atRM of Just (Sourced atS textAt) | not $ TL.null textAt -> [Sourced atS (fromString "at", textAt)] _ -> [] instance FromPad () where fromPad _ = () {- unionLocation :: Location -> Location -> Location unionLocation (x:|xs) (y:|_ys) = (unionFileRange x y:|xs) unionFileRange :: FileRange -> FileRange -> FileRange unionFileRange (FileRange xf xb xe) (FileRange _yf yb ye) = FileRange xf xb ye -} instance Xmlify Root where xmlify inh tn@(Tree (Sourced src@(sn:|ssn) nod) ts) = case nod of ---------------------- NodePara -> case inh_para inh of [] -> xmlify inh ts para:_ -> Seq.singleton $ para inh tn ---------------------- NodeHeader hdr -> case hdr of -- HeaderSection{} -> Seq.singleton $ element src "section" $ xmlifySection inh tn -- HeaderColon localName _wh -> let (attrs, body) = partitionAttrs ts in case name of -- NOTE: disable 'inh_figure' "about" -> Seq.singleton $ element src "about" $ xmlAttrs attrs <> xmlify inh'{inh_figure=False} body -- NOTE: handle judgment _ | Just lName <- localName , lName`List.elem`elemsJudgment -> -- FIXME: not a special case so far. Seq.singleton $ element src name $ xmlAttrs attrs <> xmlify inh'' body where inh'' = inh' { inh_para = case name of "grades" -> List.repeat attributeId "judges" -> List.repeat attributeId _ -> List.repeat elementTitle } -- NOTE: in
mode, unreserved elements become
_ | Just lName <- localName , inh_figure inh && lName`List.notElem`elems || null name -> Seq.singleton $ element src "figure" $ -- xmlAttrs (setAttr (Sourced en en ("type", XML.unNCName lName)) attrs) <> xmlAttrs (attrs `defaultAttr` Sourced (sn{fileRange_end=fileRange_begin sn}:|ssn) (fromString "type", XML.unNCName lName)) <> case toList body of [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body -- NOTE: reserved elements _ -> Seq.singleton $ element src name $ xmlAttrs attrs <> xmlify inh' body where name = maybe mempty (TL.unpack . XML.unNCName) localName inh' = inh { inh_para = case name of "about" -> List.repeat elementTitle "reference" -> List.repeat elementTitle "serie" -> List.repeat attributeName "author" -> List.repeat attributeName "editor" -> List.repeat attributeName "org" -> List.repeat attributeName "note" -> List.repeat elementPara _ -> [] } -- HeaderBar localName wh -> case localName of Just lName | inh_figure inh && lName`List.notElem`elems -> xmlify inh $ Tree (Sourced src $ NodeHeader $ HeaderColon localName wh) ts _ -> Seq.singleton $ element src "artwork" $ xmlAttrs (Seq.singleton $ Sourced (sn{fileRange_end=fileRange_end sn}:|ssn) (fromString "type", maybe mempty XML.unNCName localName)) <> xmlify inh{inh_para=[]} ts -- HeaderGreat localName _wh -> let (attrs,body) = partitionAttrs ts in Seq.singleton $ element src "quote" $ xmlAttrs (attrs `defaultAttr` Sourced (sn{fileRange_end=fileRange_end sn}:|ssn) (fromString "type", maybe mempty XML.unNCName localName)) <> xmlify inh{inh_para=List.repeat elementPara} body -- HeaderEqual localName _wh -> Seq.singleton $ Tree (Sourced src $ XML.NodeAttr (XML.qName localName)) $ return $ tree0 $ Sourced src $ XML.NodeText $ XML.escapeText $ Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts -- HeaderDot n -> Seq.singleton $ element src "li" $ let fileRange_end = (fileRange_begin sn) { colNum= colNum (fileRange_begin sn) <> num (TL.length n) } in xmlAttrs (Seq.singleton $ Sourced (sn{fileRange_end}:|ssn) (fromString "name", n)) <> xmlify inh ts -- HeaderDash -> Seq.singleton $ element src "li" $ xmlify inh ts -- HeaderDashDash -> Seq.singleton $ Tree0 $ Sourced src $ XML.NodeComment $ Plain.writePlain ts -- HeaderBrackets ident -> let (attrs, body) = partitionAttrs ts in Seq.singleton $ element src "reference" $ xmlAttrs (setAttr (Sourced (sn{fileRange_end=fileRange_end sn}:|ssn) (fromString "id",ident)) attrs) |> element src "about" ( xmlify inh'{inh_para = List.repeat elementTitle} body ) where inh' = inh{inh_figure = False} -- HeaderDotSlash _file -> xmlify inh ts ---------------------- NodePair pair -> case pair of PairBracket | to <- Plain.writePlain ts , TL.all (\c -> c/='[' && c/=']' && Char.isPrint c && not (Char.isSpace c)) to -> Seq.singleton $ element src "ref" $ xmlAttrs [Sourced src (fromString "to",to)] PairStar -> Seq.singleton $ element src "b" $ xmlify inh ts PairDash -> Seq.singleton $ element src "del" $ xmlify inh ts PairUnderscore -> Seq.singleton $ element src "u" $ xmlify inh ts PairSlash -> Seq.singleton $ element src "i" $ xmlify inh ts PairBackquote -> Seq.singleton $ element src "code" $ xmlify inh ts PairFrenchquote -> Seq.singleton $ element src "q" $ case ts of (Seq.viewl -> Tree0 (Sourced sl (NodeToken (TokenText l))) :< ls) -> case Seq.viewr ls of m :> Tree0 (Sourced sr (NodeToken (TokenText r0))) -> xmlify inh $ Tree0 (Sourced sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l)))) Seq.<|(m Seq.|>Tree0 (Sourced sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r0))))) _ -> xmlify inh $ Tree0 (Sourced sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls (Seq.viewr -> rs :> Tree0 (Sourced sr (NodeToken (TokenText r0)))) -> xmlify inh $ rs Seq.|> Tree0 (Sourced sr (NodeToken (TokenText (TL.dropAround Char.isSpace r0)))) _ -> xmlify inh ts PairTag isBackref -> Seq.singleton $ element src (if isBackref then "tag-back" else "tag") $ xmlAttrs [Sourced src (fromString "to", Plain.writePlain ts)] -- xmlAttrs [Sourced src ("to",to)] -- xmlify inh{inh_para=[]} ts -- xmlAttrs [Sourced src ("to",Plain.writePlain ts)] PairAt isBackref -> Seq.singleton $ element src (if isBackref then "at-back" else "at") $ xmlAttrs [Sourced src (fromString "to", Plain.writePlain ts)] PairElem n attrs -> Seq.singleton $ Tree (Sourced src $ XML.NodeElem $ XML.QName xmlns_dtc n) $ xmlAttrs (Seq.fromList $ (\(_wh, ElemAttr{..}) -> Sourced src (elemAttr_name, elemAttr_value)) <$> attrs) <> xmlify inh ts _ -> Seq.singleton (Tree0 $ Sourced (sn{fileRange_end=bn'}:|ssn) $ XML.NodeText (XML.EscapedText $ pure $ XML.EscapedPlain open)) `XML.union` xmlify inh ts `XML.union` Seq.singleton (Tree0 $ Sourced (sn{fileRange_begin=en'}:|ssn) $ XML.NodeText $ XML.EscapedText $ pure $ XML.EscapedPlain close) where (open, close) = pairBorders pair ts bn' = (fileRange_begin sn){colNum=num $ colInt (fileRange_begin sn) + int (TL.length open)} en' = (fileRange_end sn){colNum=num $ colInt (fileRange_end sn) - int (TL.length close)} ---------------------- NodeText t -> Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText $ XML.escapeText t ---------------------- NodeToken tok -> case tok of TokenEscape c -> Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText $ XML.EscapedText $ pure $ XML.escapeChar c TokenText t -> Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText $ XML.escapeText t TokenAt b to -> Seq.singleton $ element src (if b then "at-back" else "at") $ xmlAttrs [Sourced src (fromString "to", to)] -- Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText to TokenTag b to -> Seq.singleton $ element src (if b then "tag-back" else "tag") $ xmlAttrs [Sourced src (fromString "to", to)] -- Seq.singleton $ Tree0 $ Sourced src $ XML.NodeText to TokenLink lnk -> Seq.singleton $ element src "eref" $ xmlAttrs [Sourced src (fromString "to", lnk)] ---------------------- NodeLower n as -> Seq.singleton $ element src "artwork" $ xmlify inh ts instance Xmlify a => Xmlify (Maybe a) where xmlify inh = \case Nothing -> mempty Just a -> xmlify inh a {- instance Xmlify (Seq (Cell (XML.QName,TL.Text))) where xmlify _inh = xmlAttrs -} -- * Elements element :: TCT.Location -> String -> XMLs -> XML element src n = Tree (Sourced src $ XML.NodeElem $ XML.QName xmlns_dtc $ fromString n) -- | Reserved elements' name elems :: Set ElemName -- TODO: use a @data@ or maybe a @data family@ instead of 'TL.Text' elems = Set.fromList $ fromString <$> [ "about" , "abstract" , "address" , "alias" , "annotation" , "area" , "artwork" , "aside" , "at" , "at-back" , "audio" , "author" , "authors" , "bcp14" , "br" , "break" , "call" , "city" , "code" , "comment" , "comments" , "country" , "date" , "dd" , "default" , "define" , "del" , "div" , "dl" , "document" , "dt" , "editor" , "email" , "embed" , "eref" , "fax" , "feed" , "feedback" , "figure" , "filter" , "format" , "from" , "h" , "head" , "hi" , "html5" , "i" , "index" , "iref" , "keyword" , "li" , "link" , "name" , "note" , "ol" , "organization" , "para" , "page-ref" , "postamble" , "preamble" , "q" , "ref" , "reference" , "references" , "refs" , "region" , "sc" , "section" , "serie" , "source" , "span" , "street" , "style" , "sub" , "sup" , "table" , "tag" , "tag-back" , "tbody" , "td" , "tel" , "tfoot" , "th" , "thead" , "title" , "toc" , "tof" , "tr" , "tt" , "u" , "ul" , "uri" , "version" , "video" , "workgroup" , "xml" , "zipcode" ] elemsJudgment :: Set ElemName elemsJudgment = Set.fromList $ fromString <$> [ "choice" , "grade" , "grades" , "judge" , "judges" , "judgment" , "opinion" ] -- * Attributes type Attrs = Seq (Cell (XML.NCName, TL.Text)) -- | Convenient alias, forcing the types xmlAttrs :: Attrs -> XMLs xmlAttrs = (<$>) $ \(Sourced src (n, v)) -> Tree (Sourced src $ XML.NodeAttr (XML.qName n)) $ Seq.singleton $ tree0 $ Sourced src $ XML.NodeText $ XML.escapeText v -- | Extract section titles partitionSection :: Root -> (Roots, Roots) partitionSection (Tree (unSourced -> NodeHeader (HeaderSection lvlPar)) body) = case Seq.viewl body of EmptyL -> mempty title@(unTree -> Sourced (FileRange{fileRange_end=et}:|_) NodePara) :< rest -> let (subtitles, content) = spanlSubtitles et rest in (title <| (subtitles >>= subTrees), content) where spanlSubtitles ep ts = case Seq.viewl ts of sub@(unTree -> Sourced (FileRange{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs | lvlSub <= lvlPar , lineInt fileRange_begin - lineInt ep <= 1 -> let (subs, ts') = spanlSubtitles fileRange_end rs in (sub <| subs, ts') _ -> (mempty, ts) _ -> (mempty, body) partitionSection _ = mempty -- | Extract attributes partitionAttrs :: Roots -> (Attrs, Roots) partitionAttrs ts = (attrs, cs) where (as,cs) = (`Seq.partition` ts) $ \case Tree (unSourced -> NodeHeader (HeaderEqual (XML.NCName n) _wh)) _cs -> not $ TL.null n _ -> False attrs = attr <$> as attr = \case Tree (Sourced loc (NodeHeader (HeaderEqual n _wh))) a -> Sourced loc (n, v) where v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a _ -> undefined getAttrId :: Root -> TL.Text getAttrId = Plain.writePlain . Seq.singleton setAttr :: Cell (XML.NCName, TL.Text) -> Attrs -> Attrs setAttr a@(unSourced -> (k, _v)) as = case Seq.findIndexL (\(unSourced -> (n,_v)) -> n == k) as of Just idx -> Seq.update idx a as Nothing -> a <| as defaultAttr :: Attrs -> Cell (XML.NCName, TL.Text) -> Attrs defaultAttr as a@(unSourced -> (k, _v)) = case Seq.findIndexL (\(unSourced -> (n,_v)) -> n == k) as of Just _idx -> as Nothing -> a <| as