{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE StrictData #-} module Symantic.XML.Tree.Write where import Data.Bool import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), all) import Data.Function (($), (.), const) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) import Data.Traversable (Traversable(..)) import Data.Tuple (fst) import System.IO (IO, FilePath) import qualified Control.Monad.Trans.State as S import qualified Data.ByteString.Lazy as BSL import qualified Data.Char as Char import qualified Data.HashMap.Strict as HM import qualified Data.HashSet as HS import qualified Data.List as List import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as TLB import qualified Data.Text.Lazy.Encoding as TL import qualified Data.TreeSeq.Strict as TS import Symantic.XML.Language import Symantic.XML.Tree.Source import Symantic.XML.Tree.Data import Symantic.XML.Write writeTree :: UnSource src => Trees src -> TL.Text writeTree ts = TLB.toLazyText $ unTreeWrite (write_Trees ts) defaultWriteInh writeTreeIndented :: UnSource src => TL.Text -> Trees src -> TL.Text writeTreeIndented ind xs = TLB.toLazyText $ unTreeWrite (write_Trees xs) defaultWriteInh { writeInh_indent_delta = ind } writeFile :: FilePath -> TL.Text -> IO () writeFile fp = BSL.writeFile fp . TL.encodeUtf8 -- * Type 'TreeWrite' newtype TreeWrite = TreeWrite { unTreeWrite :: WriteInh -> TLB.Builder } instance Semigroup TreeWrite where TreeWrite x <> TreeWrite y = TreeWrite (x <> y) instance Monoid TreeWrite where mempty = TreeWrite (const "") mappend = (<>) instance IsString TreeWrite where fromString = TreeWrite . const . fromString write_Trees :: UnSource src => Trees src -> TreeWrite write_Trees = foldMap write_Tree write_Tree :: UnSource src => Tree src -> TreeWrite write_Tree (TS.Tree node elemChilds) = TreeWrite $ \inh -> case unSource node of NodeText et@(EscapedText t) -- Remove spaces when indenting | not $ TL.null (writeInh_indent_delta inh) , all (\case EscapedPlain p -> TL.all Char.isSpace p _ -> False ) t -> mempty | otherwise -> textify et NodePI pn pv -> writeInh_indent inh <> "textify pn<> (case pn of -- Special case: the value of the "xml" PI is parsed -- as children NodePI "xml" -> foldMap (\case TS.Tree nod _ -> case unSource nod of NodePI n v -> " "<>textify n<>"=\""<>textify v<>"\"" _ -> mempty ) elemChilds _ -> s<>textify pv ) <> "?>" <> nl inh where s | TL.null pv = "" | otherwise = " " NodeCDATA t -> writeInh_indent inh <> "<[CDATA[["<>textify (TL.replace "]]>" "]]>" t)<>"]]>"<>nl inh NodeComment t -> writeInh_indent inh <> "" "-->" t)<>"-->"<>nl inh NodeElem elemQName elemAttrs -> writeInh_indent inh <> "<" <> write_elemPName <> write_xmlnsAttrs <> write_elemAttrs <> if noChild then "/>" <> nl inh else ">" <> (if hasIndenting then nl inh else mempty) <> write_elemChilds <> (if hasIndenting then writeInh_indent inh else mempty) <> "write_elemPName<>">" <> nl inh where -- Empty NodeText do not count as a child noChild = all (\case TS.Tree n _ts | NodeText (EscapedText t) <- unSource n -> all (\case EscapedPlain p -> TL.null p _ -> False ) t | otherwise -> False ) elemChilds -- Follow xmllint --format rules to detect indenting: -- if there is any NodeText it should only contain whites hasIndenting = (`all` elemChilds) $ \case TS.Tree n _ts | NodeText (EscapedText t) <- unSource n -> all (\case EscapedPlain p -> TL.all Char.isSpace p _ -> False ) t | otherwise -> True (usedNS, declNS) = HM.foldlWithKey' go (initUsedNS, initDeclNS) elemAttrs where initUsedNS = HS.singleton $ qNameSpace elemQName initDeclNS = (writeInh_namespaces inh){namespaces_prefixes=mempty} go acc@(uNS, dNS) an sav = case unSource sav of av -- xmlns:prefix="namespace" | qNameSpace an == xmlns_xmlns -> let ns = unescapeAttr av in (uNS, dNS { namespaces_prefixes = (if TL.null ns then HM.delete -- Empty namespace means removal -- of the prefix from scope. else (`HM.insert` qNameLocal an)) (Namespace ns) (namespaces_prefixes dNS) }) -- xmlns="namespace" | qNameSpace an == xmlns_empty , qNameLocal an == NCName "xmlns" -> (uNS, dNS{namespaces_default = Namespace (unescapeAttr av)}) -- name="value" | qNameSpace an == xmlns_empty -> acc -- {namespace}name="value" | otherwise -> (HS.insert (qNameSpace an) uNS, dNS) -- The inherited namespaces, -- including those declared at this element. inhNS = HM.union (namespaces_prefixes declNS) (namespaces_prefixes (writeInh_namespaces inh)) -- The namespaces used but not declared nor default, -- with fresh prefixes. autoNS = HM.delete (namespaces_default declNS) $ (`S.evalState` HS.empty) $ traverse (\() -> S.gets freshNCName) (HS.toMap usedNS `HM.difference` inhNS) write_xmlnsAttrs = foldMap (\(Namespace ns, qNameLocal) -> textifyAttr (PName (Just "xmlns") qNameLocal) (escapeAttr ns)) $ List.sortOn fst $ HM.toList autoNS scopeNS = declNS{ namespaces_prefixes = autoNS <> inhNS } write_elemPName = textify $ prefixifyQName scopeNS elemQName write_elemAttrs = foldMap (\(an, av) -> textifyAttr (prefixifyQName scopeNS{namespaces_default=xmlns_empty} an) (unSource av)) $ List.sortOn fst $ -- This makes the rendition more predictible, but this is useless. HM.toList elemAttrs write_elemChilds = unTreeWrite (write_Trees elemChilds) inh { writeInh_namespaces = scopeNS -- Disable indenting unless hasIndenting. , writeInh_indent = if hasIndenting then writeInh_indent inh <> textify (writeInh_indent_delta inh) else mempty , writeInh_indent_delta = if hasIndenting then writeInh_indent_delta inh else mempty }