1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE StrictData #-}
3 module Symantic.XML.Text where
6 import Data.Char (Char)
7 import Data.Eq (Eq(..))
8 import Data.Foldable (Foldable(..))
9 import Data.Function (($), (.))
10 import Data.Maybe (Maybe(..))
11 import Data.Monoid (Monoid(..))
12 import Data.Ord (Ord(..))
13 import Data.Semigroup (Semigroup(..))
14 import Data.Sequence (Seq)
15 import Data.String (IsString(..), String)
16 import Text.Show (Show(..))
17 import qualified Data.Char as Char
18 import qualified Data.List as List
19 import qualified Data.Sequence as Seq
20 import qualified Data.Text.Lazy as TL
21 import qualified Data.Text.Lazy.Builder as TLB
23 import Symantic.XML.Namespace
26 -- | 'EscapedText' lexemes.
28 = EscapedPlain TL.Text
29 | EscapedEntityRef EntityRef
30 | EscapedCharRef CharRef
31 deriving (Eq, Ord, Show)
33 -- ** Type 'EntityRef'
36 { entityRef_name :: NCName
37 , entityRef_value :: TL.Text
38 } deriving (Eq, Ord, Show)
44 entityRef_apos :: EntityRef
45 entityRef_lt = EntityRef (NCName "lt") "<"
46 entityRef_gt = EntityRef (NCName "gt") ">"
47 entityRef_amp = EntityRef (NCName "amp") "&"
48 entityRef_quot = EntityRef (NCName "quot") "\""
49 entityRef_apos = EntityRef (NCName "apos") "'"
52 newtype CharRef = CharRef Char
53 deriving (Eq, Ord, Show)
55 -- * Type 'EscapedText'
56 newtype EscapedText = EscapedText (Seq Escaped)
57 deriving (Eq, Ord, Show)
59 instance IsString EscapedText where
60 fromString = escapeText . fromString
62 unEscapedText :: EscapedText -> Seq Escaped
63 unEscapedText (EscapedText et) = et
64 {-# INLINE unEscapedText #-}
66 escapeText :: TL.Text -> EscapedText
69 -- Add '>' to escape "]]>" without adding a 'TL.replace'.
70 case TL.span (`List.notElem` ("<&>"::String)) s of
71 (t, r) | TL.null t -> escape r
72 | otherwise -> EscapedPlain t Seq.<| escape r
74 escape t = case TL.uncons t of
76 Just (c, cs) -> escapeTextChar c Seq.<| et
77 where EscapedText et = escapeText cs
79 escapeTextChar :: Char -> Escaped
80 escapeTextChar = \case
81 '<' -> EscapedEntityRef entityRef_lt
82 '&' -> EscapedEntityRef entityRef_amp
83 -- Add '>' to escape "]]>".
84 '>' -> EscapedEntityRef entityRef_gt
85 c -> EscapedPlain $ TL.singleton c
87 unescapeText :: EscapedText -> TL.Text
88 unescapeText (EscapedText et) = (`foldMap` et) $ \case
90 EscapedEntityRef EntityRef{..} -> entityRef_value
91 EscapedCharRef (CharRef c) -> TL.singleton c
93 -- * Type 'EscapedAttr'
94 newtype EscapedAttr = EscapedAttr (Seq Escaped)
95 deriving (Eq, Ord, Show)
97 instance IsString EscapedAttr where
98 fromString = escapeAttr . fromString
100 unEscapedAttr :: EscapedAttr -> Seq Escaped
101 unEscapedAttr (EscapedAttr et) = et
102 {-# INLINE unEscapedAttr #-}
104 escapeAttr :: TL.Text -> EscapedAttr
107 case TL.span (`List.notElem` ("<&\""::String)) s of
108 (t, r) | TL.null t -> escape r
109 | otherwise -> EscapedPlain t Seq.<| escape r
111 escape t = case TL.uncons t of
113 Just (c, cs) -> escapeAttrChar c Seq.<| et
114 where EscapedAttr et = escapeAttr cs
116 escapeAttrChar :: Char -> Escaped
117 escapeAttrChar = \case
118 '<' -> EscapedEntityRef entityRef_lt
119 '&' -> EscapedEntityRef entityRef_amp
120 -- Remove '\'' because 'textifyAttr' uses '"' for quoting.
121 -- '\'' -> EscapedEntityRef entityRef_apos
122 '"' -> EscapedEntityRef entityRef_quot
123 c -> EscapedPlain $ TL.singleton c
125 unescapeAttr :: EscapedAttr -> TL.Text
126 unescapeAttr (EscapedAttr et) = unescapeText (EscapedText et)
129 class Textify a where
130 textify :: a -> TLB.Builder
131 instance Textify Char.Char where
132 textify = TLB.singleton
133 instance Textify String where
134 textify = TLB.fromString
135 instance Textify TL.Text where
136 textify = TLB.fromLazyText
137 instance Textify NCName where
138 textify = textify . unNCName
139 instance Textify PName where
142 Nothing -> textify pNameLocal
143 Just p -> textify p<>":"<> textify pNameLocal
144 instance Textify Namespace where
145 textify = textify . unNamespace
146 instance Textify EntityRef where
147 textify EntityRef{..} = "&"<>textify entityRef_name<>";"
148 instance Textify CharRef where
149 textify (CharRef c) = "&#"<>textify (show (Char.ord c))<>";"
150 instance Textify EscapedText where
151 textify (EscapedText et) = (`foldMap` et) $ \case
152 EscapedPlain t -> textify t
153 EscapedEntityRef r -> textify r
154 EscapedCharRef r -> textify r
155 instance Textify EscapedAttr where
156 textify (EscapedAttr et) = "\""<>txt<>"\""
158 txt = (`foldMap` et) $ \case
159 EscapedPlain t -> textify t
160 EscapedEntityRef r -> textify r
161 EscapedCharRef r -> textify r
163 textifyAttr :: PName -> EscapedAttr -> TLB.Builder
164 textifyAttr n v = " "<>textify n<>"="<>textify v