]> Git — Sourcephile - haskell/symantic-xml.git/blob - Language/Symantic/XML/Write.hs
Add more XML test files.
[haskell/symantic-xml.git] / Language / Symantic / XML / Write.hs
1 {-# LANGUAGE BangPatterns #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE StrictData #-}
4 module Language.Symantic.XML.Write where
5
6 import Control.Applicative (Applicative(..), liftA2)
7 import Control.Monad (Monad(..))
8 import Data.Bool
9 import Data.Default.Class (Default(..))
10 import Data.Eq (Eq(..))
11 import Data.Foldable (Foldable(..), all)
12 import Data.Function (($), (.), const)
13 import Data.Maybe (Maybe(..))
14 import Data.Monoid (Monoid(..))
15 import Data.Semigroup (Semigroup(..))
16 import Data.String (String, IsString(..))
17 import Data.Traversable (Traversable(..))
18 import System.IO (IO, FilePath)
19 import Text.Show (Show(..))
20 import qualified Control.Monad.Trans.Reader as R
21 import qualified Control.Monad.Trans.State as S
22 import qualified Data.ByteString.Lazy as BSL
23 import qualified Data.Char as Char
24 import qualified Data.HashMap.Strict as HM
25 import qualified Data.HashSet as HS
26 import qualified Data.Sequence as Seq
27 import qualified Data.Text.Lazy as TL
28 import qualified Data.Text.Lazy.Builder as TLB
29 import qualified Data.Text.Lazy.Encoding as TL
30
31 import Language.Symantic.XML.Document as XML
32
33 writeXML :: XMLs -> TL.Text
34 writeXML xs = TLB.toLazyText $ write xs `R.runReader` def
35
36 writeXMLIndented :: TL.Text -> XMLs -> TL.Text
37 writeXMLIndented ind xs =
38 TLB.toLazyText $
39 write xs `R.runReader` def
40 { reader_indent = if TL.null ind then mempty else "\n"
41 , reader_indent_delta = ind
42 }
43
44 writeFile :: FilePath -> TL.Text -> IO ()
45 writeFile fp t = BSL.writeFile fp $ TL.encodeUtf8 t
46
47 -- * Type 'Write'
48 type Write = R.Reader Reader TLB.Builder
49 instance Semigroup Write where
50 (<>) = liftA2 (<>)
51 instance Monoid Write where
52 mempty = return ""
53 mappend = (<>)
54 instance IsString Write where
55 fromString = return . fromString
56
57 -- ** Type 'Reader'
58 data Reader = Reader
59 { reader_ns_scope :: Namespaces NCName
60 , reader_indent :: TLB.Builder
61 , reader_indent_delta :: TL.Text
62 , reader_no_text :: Bool
63 }
64 instance Default Reader where
65 def = Reader
66 { reader_ns_scope = def
67 , reader_indent = ""
68 , reader_indent_delta = ""
69 , reader_no_text = False
70 }
71
72 -- * Class 'Buildable'
73 class Buildable a where
74 build :: a -> TLB.Builder
75 instance Buildable Char.Char where
76 build = TLB.singleton
77 instance Buildable String where
78 build = TLB.fromString
79 instance Buildable TL.Text where
80 build = TLB.fromLazyText
81 instance Buildable NCName where
82 build = build . unNCName
83 instance Buildable Name where
84 build = build . unName
85 instance Buildable PName where
86 build PName{..} =
87 case pNameSpace of
88 Nothing -> build pNameLocal
89 Just p -> build p<>":"<> build pNameLocal
90 instance Buildable Namespace where
91 build = build . unNamespace
92 instance Buildable EntityRef where
93 build EntityRef{..} = "&"<>build entityRef_name<>";"
94 instance Buildable CharRef where
95 build (CharRef c) = "&#"<>build (show (Char.ord c))<>";"
96 instance Buildable Text where
97 build = foldMap $ \case
98 TextLexemePlain t -> build t
99 TextLexemeEntityRef r -> build r
100 TextLexemeCharRef r -> build r
101
102 -- * Class 'Writable'
103 class Writeable a where
104 write :: a -> Write
105 instance Writeable NCName where
106 write = return . TLB.fromLazyText . unNCName
107 instance Writeable XMLs where
108 write xs = do
109 ro <- R.ask
110 if TL.null (reader_indent_delta ro)
111 then foldMap write xs
112 else
113 R.local (const ro{reader_no_text}) $
114 foldMap write xs
115 where reader_no_text =
116 (`all` xs) $ \case
117 Tree (Sourced _ (NodeText txt)) _ts ->
118 all (\case
119 TextLexemePlain t -> TL.all Char.isSpace t
120 _ -> False) txt
121 _ -> True
122 instance Writeable XML where
123 write (Tree (Sourced _src nod) xs) = do
124 ro <- R.ask
125 case nod of
126 NodeAttr an
127 | [Tree (Sourced _ (NodeText av)) _] <- toList xs -> do
128 return $ " "<>buildAttr (prefixifyQName (reader_ns_scope ro) an) av
129 | otherwise -> mempty
130 NodeCDATA t ->
131 return $
132 reader_indent ro <>
133 "<[CDATA[["<>build t<>"]]>"
134 NodeComment t ->
135 return $
136 reader_indent ro <>
137 "<!--"<>build t<>"-->"
138 NodeElem elemQName -> do
139 let (elemAttrs, elemChilds) =
140 (`Seq.spanl` xs) $ \case
141 Tree (Sourced _ NodeAttr{}) _ -> True
142 _ -> False
143 let (usedNS, declNS) ::
144 ( HS.HashSet Namespace
145 , Namespaces NCName
146 ) =
147 foldl' go (initUsedNS, initDeclNS) elemAttrs
148 where
149 initUsedNS
150 | qNameSpace elemQName == xmlns_empty = mempty
151 | otherwise = HS.singleton $ qNameSpace elemQName
152 initDeclNS = def{namespaces_default = namespaces_default $ reader_ns_scope ro}
153 go (!uNS, !dNS) = \case
154 Tree (Sourced _ (NodeAttr QName{..})) vs
155 -- xmlns:prefix="namespace"
156 | qNameSpace == xmlns_xmlns
157 , [Tree (Sourced _ (NodeText t)) _] <- toList vs ->
158 let n = flatText t in
159 (uNS,) dNS
160 { namespaces_prefixes =
161 (if TL.null n
162 then HM.delete
163 -- NOTE: empty namespace means removal of the prefix from scope.
164 else (`HM.insert` qNameLocal))
165 (Namespace n)
166 (namespaces_prefixes dNS)
167 }
168 -- xmlns="namespace"
169 | qNameSpace == xmlns_empty
170 , qNameLocal == NCName "xmlns"
171 , [Tree (Sourced _ (NodeText t)) _] <- toList vs ->
172 (uNS,)
173 dNS{namespaces_default = Namespace $ flatText t}
174 -- name="value"
175 | qNameSpace == xmlns_empty -> (uNS, dNS)
176 -- {namespace}name="value"
177 | otherwise -> (HS.insert qNameSpace uNS, dNS)
178 _ -> (uNS, dNS)
179 let inhNS =
180 -- NOTE: the inherited namespaces,
181 -- including those declared at this element.
182 HM.union
183 (namespaces_prefixes declNS)
184 (namespaces_prefixes (reader_ns_scope ro))
185 let autoNS =
186 -- NOTE: the namespaces used but not declared nor default,
187 -- with fresh prefixes.
188 HM.delete (namespaces_default declNS) $
189 (`S.evalState` HS.empty) $
190 traverse
191 (\() -> S.gets freshNCName)
192 (HS.toMap usedNS `HM.difference` inhNS)
193 let autoAttrs =
194 -- NOTE: XMLify autoNS
195 HM.foldlWithKey'
196 (\acc (Namespace v) p ->
197 (acc Seq.|>) $
198 Tree (notSourced $ NodeAttr QName{qNameSpace=xmlns_xmlns, qNameLocal=p}) $
199 pure $ tree0 $ notSourced $ NodeText $ pure $ TextLexemePlain v
200 ) mempty autoNS
201 let scopeNS = declNS { namespaces_prefixes = autoNS <> inhNS }
202 return $
203 let build_elemPName = build $ prefixifyQName scopeNS elemQName in
204 let build_elemAttrs =
205 (`foldMap` (autoAttrs <> elemAttrs)) $ \case
206 Tree (Sourced _ (NodeAttr an)) vs
207 | [Tree (Sourced _ (NodeText av)) _] <- toList vs ->
208 " "<>buildAttr (prefixifyQName scopeNS{namespaces_default=""} an) av
209 _ -> mempty in
210 reader_indent ro
211 <> "<"<>build_elemPName
212 <> build_elemAttrs <>
213 let build_elemChilds = write elemChilds
214 `R.runReader` ro
215 { reader_ns_scope = scopeNS
216 , reader_indent = reader_indent ro <> build (reader_indent_delta ro)
217 } in
218 if null elemChilds
219 then "/>"
220 else ">"
221 <> build_elemChilds
222 <> (
223 if TL.null (reader_indent_delta ro)
224 || noIndent elemChilds
225 then mempty
226 else reader_indent ro
227 )
228 <> "</"<>build_elemPName<>">"
229 where
230 noIndent =
231 all $ \case
232 Tree (Sourced _ (NodeText _txt)) _ts -> True
233 _ -> False
234 NodePI pn pv
235 | pn == "xml" -> do
236 write_xs <- write xs
237 return $
238 "<?"<>build pn<>s<>write_xs<>"?>"
239 | otherwise ->
240 return $
241 reader_indent ro <>
242 "<?"<>build pn<>s<>build pv<>"?>"
243 where s | TL.null pv = ""
244 | otherwise = " "
245 NodeText t -> do
246 return $
247 if reader_no_text ro
248 then mempty
249 else build t
250
251 buildAttr :: PName -> Text -> TLB.Builder
252 buildAttr n v = build n<>"=\""<>buildAttrValue v<>"\""
253
254 buildAttrValue :: Text -> TLB.Builder
255 buildAttrValue = foldMap $ \case
256 TextLexemePlain p -> build p
257 TextLexemeEntityRef EntityRef{..} ->
258 build $ TL.replace "\"" "&quot;" entityRef_value
259 TextLexemeCharRef (CharRef c)
260 | c == '\"' -> "&quot;"
261 | otherwise -> build c
262
263 removeSpaces :: XMLs -> XMLs
264 removeSpaces xs =
265 if (`all` xs) $ \case
266 Tree (Sourced _ (NodeText txt)) _ts ->
267 all (\case
268 TextLexemePlain t -> TL.all Char.isSpace t
269 _ -> False) txt
270 _ -> True
271 then (`Seq.filter` xs) $ \case
272 Tree (Sourced _ NodeText{}) _ts -> False
273 _ -> True
274 else xs