{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE StrictData #-} module Symantic.XML.Text where import Data.Bool import Data.Char (Char) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Ord (Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq) import Data.String (IsString(..), String) import Text.Show (Show(..)) import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as TLB import Symantic.XML.Namespace -- * Type 'Escaped' -- | 'EscapedText' lexemes. data Escaped = EscapedPlain TL.Text | EscapedEntityRef EntityRef | EscapedCharRef CharRef deriving (Eq, Ord, Show) -- ** Type 'EntityRef' data EntityRef = EntityRef { entityRef_name :: NCName , entityRef_value :: TL.Text } deriving (Eq, Ord, Show) entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef entityRef_lt = EntityRef (NCName "lt") "<" entityRef_gt = EntityRef (NCName "gt") ">" entityRef_amp = EntityRef (NCName "amp") "&" entityRef_quot = EntityRef (NCName "quot") "\"" entityRef_apos = EntityRef (NCName "apos") "'" -- ** Type 'CharRef' newtype CharRef = CharRef Char deriving (Eq, Ord, Show) -- * Type 'EscapedText' newtype EscapedText = EscapedText (Seq Escaped) deriving (Eq, Ord, Show) instance IsString EscapedText where fromString = escapeText . fromString unEscapedText :: EscapedText -> Seq Escaped unEscapedText (EscapedText et) = et {-# INLINE unEscapedText #-} escapeText :: TL.Text -> EscapedText escapeText s = EscapedText $ -- Add '>' to escape "]]>" without adding a 'TL.replace'. case TL.span (`List.notElem` ("<&>"::String)) s of (t, r) | TL.null t -> escape r | otherwise -> EscapedPlain t Seq.<| escape r where escape t = case TL.uncons t of Nothing -> mempty Just (c, cs) -> escapeTextChar c Seq.<| et where EscapedText et = escapeText cs escapeTextChar :: Char -> Escaped escapeTextChar = \case '<' -> EscapedEntityRef entityRef_lt '&' -> EscapedEntityRef entityRef_amp -- Add '>' to escape "]]>". '>' -> EscapedEntityRef entityRef_gt c -> EscapedPlain $ TL.singleton c unescapeText :: EscapedText -> TL.Text unescapeText (EscapedText et) = (`foldMap` et) $ \case EscapedPlain t -> t EscapedEntityRef EntityRef{..} -> entityRef_value EscapedCharRef (CharRef c) -> TL.singleton c -- * Type 'EscapedAttr' newtype EscapedAttr = EscapedAttr (Seq Escaped) deriving (Eq, Ord, Show) instance IsString EscapedAttr where fromString = escapeAttr . fromString unEscapedAttr :: EscapedAttr -> Seq Escaped unEscapedAttr (EscapedAttr et) = et {-# INLINE unEscapedAttr #-} escapeAttr :: TL.Text -> EscapedAttr escapeAttr s = EscapedAttr $ case TL.span (`List.notElem` ("<&\""::String)) s of (t, r) | TL.null t -> escape r | otherwise -> EscapedPlain t Seq.<| escape r where escape t = case TL.uncons t of Nothing -> mempty Just (c, cs) -> escapeAttrChar c Seq.<| et where EscapedAttr et = escapeAttr cs escapeAttrChar :: Char -> Escaped escapeAttrChar = \case '<' -> EscapedEntityRef entityRef_lt '&' -> EscapedEntityRef entityRef_amp -- Remove '\'' because 'textifyAttr' uses '"' for quoting. -- '\'' -> EscapedEntityRef entityRef_apos '"' -> EscapedEntityRef entityRef_quot c -> EscapedPlain $ TL.singleton c unescapeAttr :: EscapedAttr -> TL.Text unescapeAttr (EscapedAttr et) = unescapeText (EscapedText et) -- * Class 'Textify' class Textify a where textify :: a -> TLB.Builder instance Textify Char.Char where textify = TLB.singleton instance Textify String where textify = TLB.fromString instance Textify TL.Text where textify = TLB.fromLazyText instance Textify NCName where textify = textify . unNCName instance Textify PName where textify PName{..} = case pNameSpace of Nothing -> textify pNameLocal Just p -> textify p<>":"<> textify pNameLocal instance Textify Namespace where textify = textify . unNamespace instance Textify EntityRef where textify EntityRef{..} = "&"<>textify entityRef_name<>";" instance Textify CharRef where textify (CharRef c) = "&#"<>textify (show (Char.ord c))<>";" instance Textify EscapedText where textify (EscapedText et) = (`foldMap` et) $ \case EscapedPlain t -> textify t EscapedEntityRef r -> textify r EscapedCharRef r -> textify r instance Textify EscapedAttr where textify (EscapedAttr et) = "\""<>txt<>"\"" where txt = (`foldMap` et) $ \case EscapedPlain t -> textify t EscapedEntityRef r -> textify r EscapedCharRef r -> textify r textifyAttr :: PName -> EscapedAttr -> TLB.Builder textifyAttr n v = " "<>textify n<>"="<>textify v