]> Git — Sourcephile - haskell/symantic-xml.git/blob - src/Symantic/XML/Text.hs
Rewrite to categorical symantic
[haskell/symantic-xml.git] / src / Symantic / XML / Text.hs
1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE StrictData #-}
3 module Symantic.XML.Text where
4
5 import Data.Bool
6 import Data.Char (Char)
7 import Data.Eq (Eq(..))
8 import Data.Foldable (Foldable(..))
9 import Data.Function (($), (.))
10 import Data.Maybe (Maybe(..))
11 import Data.Monoid (Monoid(..))
12 import Data.Ord (Ord(..))
13 import Data.Semigroup (Semigroup(..))
14 import Data.Sequence (Seq)
15 import Data.String (IsString(..), String)
16 import Text.Show (Show(..))
17 import qualified Data.Char as Char
18 import qualified Data.List as List
19 import qualified Data.Sequence as Seq
20 import qualified Data.Text.Lazy as TL
21 import qualified Data.Text.Lazy.Builder as TLB
22
23 import Symantic.XML.Namespace
24
25 -- * Type 'Escaped'
26 -- | 'EscapedText' lexemes.
27 data Escaped
28 = EscapedPlain TL.Text
29 | EscapedEntityRef EntityRef
30 | EscapedCharRef CharRef
31 deriving (Eq, Ord, Show)
32
33 -- ** Type 'EntityRef'
34 data EntityRef
35 = EntityRef
36 { entityRef_name :: NCName
37 , entityRef_value :: TL.Text
38 } deriving (Eq, Ord, Show)
39
40 entityRef_lt,
41 entityRef_gt,
42 entityRef_amp,
43 entityRef_quot,
44 entityRef_apos :: EntityRef
45 entityRef_lt = EntityRef (NCName "lt") "<"
46 entityRef_gt = EntityRef (NCName "gt") ">"
47 entityRef_amp = EntityRef (NCName "amp") "&"
48 entityRef_quot = EntityRef (NCName "quot") "\""
49 entityRef_apos = EntityRef (NCName "apos") "'"
50
51 -- ** Type 'CharRef'
52 newtype CharRef = CharRef Char
53 deriving (Eq, Ord, Show)
54
55 -- * Type 'EscapedText'
56 newtype EscapedText = EscapedText (Seq Escaped)
57 deriving (Eq, Ord, Show)
58
59 instance IsString EscapedText where
60 fromString = escapeText . fromString
61
62 unEscapedText :: EscapedText -> Seq Escaped
63 unEscapedText (EscapedText et) = et
64 {-# INLINE unEscapedText #-}
65
66 escapeText :: TL.Text -> EscapedText
67 escapeText s =
68 EscapedText $
69 -- Add '>' to escape "]]>" without adding a 'TL.replace'.
70 case TL.span (`List.notElem` ("<&>"::String)) s of
71 (t, r) | TL.null t -> escape r
72 | otherwise -> EscapedPlain t Seq.<| escape r
73 where
74 escape t = case TL.uncons t of
75 Nothing -> mempty
76 Just (c, cs) -> escapeTextChar c Seq.<| et
77 where EscapedText et = escapeText cs
78
79 escapeTextChar :: Char -> Escaped
80 escapeTextChar = \case
81 '<' -> EscapedEntityRef entityRef_lt
82 '&' -> EscapedEntityRef entityRef_amp
83 -- Add '>' to escape "]]>".
84 '>' -> EscapedEntityRef entityRef_gt
85 c -> EscapedPlain $ TL.singleton c
86
87 unescapeText :: EscapedText -> TL.Text
88 unescapeText (EscapedText et) = (`foldMap` et) $ \case
89 EscapedPlain t -> t
90 EscapedEntityRef EntityRef{..} -> entityRef_value
91 EscapedCharRef (CharRef c) -> TL.singleton c
92
93 -- * Type 'EscapedAttr'
94 newtype EscapedAttr = EscapedAttr (Seq Escaped)
95 deriving (Eq, Ord, Show)
96
97 instance IsString EscapedAttr where
98 fromString = escapeAttr . fromString
99
100 unEscapedAttr :: EscapedAttr -> Seq Escaped
101 unEscapedAttr (EscapedAttr et) = et
102 {-# INLINE unEscapedAttr #-}
103
104 escapeAttr :: TL.Text -> EscapedAttr
105 escapeAttr s =
106 EscapedAttr $
107 case TL.span (`List.notElem` ("<&\""::String)) s of
108 (t, r) | TL.null t -> escape r
109 | otherwise -> EscapedPlain t Seq.<| escape r
110 where
111 escape t = case TL.uncons t of
112 Nothing -> mempty
113 Just (c, cs) -> escapeAttrChar c Seq.<| et
114 where EscapedAttr et = escapeAttr cs
115
116 escapeAttrChar :: Char -> Escaped
117 escapeAttrChar = \case
118 '<' -> EscapedEntityRef entityRef_lt
119 '&' -> EscapedEntityRef entityRef_amp
120 -- Remove '\'' because 'textifyAttr' uses '"' for quoting.
121 -- '\'' -> EscapedEntityRef entityRef_apos
122 '"' -> EscapedEntityRef entityRef_quot
123 c -> EscapedPlain $ TL.singleton c
124
125 unescapeAttr :: EscapedAttr -> TL.Text
126 unescapeAttr (EscapedAttr et) = unescapeText (EscapedText et)
127
128 -- * Class 'Textify'
129 class Textify a where
130 textify :: a -> TLB.Builder
131 instance Textify Char.Char where
132 textify = TLB.singleton
133 instance Textify String where
134 textify = TLB.fromString
135 instance Textify TL.Text where
136 textify = TLB.fromLazyText
137 instance Textify NCName where
138 textify = textify . unNCName
139 instance Textify PName where
140 textify PName{..} =
141 case pNameSpace of
142 Nothing -> textify pNameLocal
143 Just p -> textify p<>":"<> textify pNameLocal
144 instance Textify Namespace where
145 textify = textify . unNamespace
146 instance Textify EntityRef where
147 textify EntityRef{..} = "&"<>textify entityRef_name<>";"
148 instance Textify CharRef where
149 textify (CharRef c) = "&#"<>textify (show (Char.ord c))<>";"
150 instance Textify EscapedText where
151 textify (EscapedText et) = (`foldMap` et) $ \case
152 EscapedPlain t -> textify t
153 EscapedEntityRef r -> textify r
154 EscapedCharRef r -> textify r
155 instance Textify EscapedAttr where
156 textify (EscapedAttr et) = "\""<>txt<>"\""
157 where
158 txt = (`foldMap` et) $ \case
159 EscapedPlain t -> textify t
160 EscapedEntityRef r -> textify r
161 EscapedCharRef r -> textify r
162
163 textifyAttr :: PName -> EscapedAttr -> TLB.Builder
164 textifyAttr n v = " "<>textify n<>"="<>textify v