]> Git — Sourcephile - haskell/symantic-xml.git/blob - Language/Symantic/XML/Document.hs
XML: add escapeChar
[haskell/symantic-xml.git] / Language / Symantic / XML / Document.hs
1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
12 , TS.Tree(..)
13 , TS.Trees
14 , TS.tree0
15 ) where
16
17 import Control.Applicative (Alternative(..))
18 import Data.Bool
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
26 import Data.Int (Int)
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
46
47 -- * Type 'XML'
48 type XML = TS.Tree (Sourced FileSource Node)
49 type XMLs = Seq XML
50
51 pattern Tree0 :: a -> TS.Tree a
52 pattern Tree0 a <- TS.Tree a (null -> True)
53 where Tree0 a = TS.Tree a Seq.empty
54
55 -- ** Type 'Node'
56 data Node
57 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
58 | NodeAttr QName -- ^ Node with a 'NodeText' child.
59 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
60 | NodeText EscapedText -- ^ Leaf.
61 | NodeComment TL.Text -- ^ Leaf.
62 | NodeCDATA TL.Text -- ^ Leaf.
63 deriving (Eq, Ord, Show)
64
65 -- ** Type 'EscapedText'
66 type EscapedText = [Escaped]
67
68 escapeText :: TL.Text -> EscapedText
69 escapeText s =
70 case TL.span (`List.notElem` ("<>&'\""::String)) s of
71 (t, r) | TL.null t -> escape r
72 | otherwise -> EscapedPlain t : escape r
73 where
74 escape t = case TL.uncons t of
75 Nothing -> []
76 Just (c, cs) -> escapeChar c : escapeText cs
77
78 escapeChar :: Char -> Escaped
79 escapeChar c =
80 case c of
81 '<' -> EscapedEntityRef entityRef_lt
82 '>' -> EscapedEntityRef entityRef_gt
83 '&' -> EscapedEntityRef entityRef_amp
84 '\'' -> EscapedEntityRef entityRef_apos
85 '"' -> EscapedEntityRef entityRef_quot
86 _ -> EscapedPlain $ TL.singleton c
87
88 unescapeText :: EscapedText -> TL.Text
89 unescapeText = foldMap $ \case
90 EscapedPlain t -> t
91 EscapedEntityRef EntityRef{..} -> entityRef_value
92 EscapedCharRef (CharRef c) -> TL.singleton c
93
94 -- *** Type 'Escaped'
95 -- | 'EscapedText' lexemes.
96 data Escaped
97 = EscapedPlain TL.Text
98 | EscapedEntityRef EntityRef
99 | EscapedCharRef CharRef
100 deriving (Eq, Ord, Show)
101
102 -- *** Type 'EntityRef'
103 data EntityRef = EntityRef
104 { entityRef_name :: NCName
105 , entityRef_value :: TL.Text
106 } deriving (Eq, Ord, Show)
107
108 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
109 entityRef_lt = EntityRef (NCName "lt") "<"
110 entityRef_gt = EntityRef (NCName "gt") ">"
111 entityRef_amp = EntityRef (NCName "amp") "&"
112 entityRef_quot = EntityRef (NCName "quot") "\""
113 entityRef_apos = EntityRef (NCName "apos") "'"
114
115 -- *** Type 'CharRef'
116 newtype CharRef = CharRef Char
117 deriving (Eq, Ord, Show)
118
119 -- ** Type 'Name'
120 newtype Name = Name { unName :: TL.Text }
121 deriving (Eq, Ord, Hashable)
122 instance Show Name where
123 showsPrec _p = showString . TL.unpack . unName
124 instance IsString Name where
125 fromString s
126 | c:cs <- s
127 , XC.isXmlNameStartChar c
128 && all XC.isXmlNameChar cs
129 = Name (TL.pack s)
130 | otherwise = error "Invalid XML Name"
131
132 -- ** Type 'Namespace'
133 newtype Namespace = Namespace { unNamespace :: TL.Text }
134 deriving (Eq, Ord, Show, Hashable)
135 instance IsString Namespace where
136 fromString s =
137 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
138 then Namespace (fromString s)
139 else error $ "Invalid XML Namespace: " <> show s
140
141 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
142 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
143 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
144 xmlns_empty = Namespace ""
145
146 -- * Type 'Namespaces'
147 data Namespaces prefix = Namespaces
148 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
149 , namespaces_default :: Namespace
150 } deriving (Show)
151 instance Default (Namespaces NCName) where
152 def = Namespaces
153 { namespaces_prefixes = HM.fromList
154 [ (xmlns_xml , "xml")
155 , (xmlns_xmlns, "xmlns")
156 ]
157 , namespaces_default = ""
158 }
159 instance Default (Namespaces (Maybe NCName)) where
160 def = Namespaces
161 { namespaces_prefixes = HM.fromList
162 [ (xmlns_xml , Just "xml")
163 , (xmlns_xmlns, Just "xmlns")
164 ]
165 , namespaces_default = ""
166 }
167 instance Semigroup (Namespaces NCName) where
168 x <> y = Namespaces
169 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
170 , namespaces_default = namespaces_default x
171 }
172 instance Semigroup (Namespaces (Maybe NCName)) where
173 x <> y = Namespaces
174 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
175 , namespaces_default = namespaces_default x
176 }
177 instance Monoid (Namespaces NCName) where
178 mempty = def
179 mappend = (<>)
180 instance Monoid (Namespaces (Maybe NCName)) where
181 mempty = def
182 mappend = (<>)
183
184 prefixifyQName :: Namespaces NCName -> QName -> PName
185 prefixifyQName Namespaces{..} QName{..} =
186 PName
187 { pNameSpace =
188 if qNameSpace == namespaces_default
189 then Nothing
190 else HM.lookup qNameSpace namespaces_prefixes
191 , pNameLocal = qNameLocal
192 }
193
194 -- ** Type 'NCName'
195 -- | Non-colonized name.
196 newtype NCName = NCName { unNCName :: TL.Text }
197 deriving (Eq, Ord, Hashable)
198 instance Show NCName where
199 showsPrec _p = showString . TL.unpack . unNCName
200 instance IsString NCName where
201 fromString s =
202 fromMaybe (error "Invalid XML NCName") $
203 ncName (TL.pack s)
204
205 ncName :: TL.Text -> Maybe NCName
206 ncName t =
207 case TL.uncons t of
208 Just (c, cs)
209 | XC.isXmlNCNameStartChar c
210 , TL.all XC.isXmlNCNameChar cs
211 -> Just (NCName t)
212 _ -> Nothing
213
214 poolNCNames :: [NCName]
215 poolNCNames =
216 [ NCName $ TL.pack ("ns"<>show i)
217 | i <- [1 :: Int ..]
218 ]
219
220 freshNCName :: HS.HashSet NCName -> NCName
221 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
222
223 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
224 freshifyNCName ns (NCName n) =
225 let ints = [1..] :: [Int] in
226 List.head
227 [ fresh
228 | suffix <- mempty : (show <$> ints)
229 , fresh <- [ NCName $ n <> TL.pack suffix]
230 , not $ fresh `HS.member` ns
231 ]
232
233 -- ** Type 'PName'
234 -- | Prefixed name.
235 data PName = PName
236 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
237 , pNameLocal :: NCName -- ^ eg. "stylesheet"
238 } deriving (Eq, Ord, Generic)
239 instance Show PName where
240 showsPrec p PName{pNameSpace=Nothing, ..} =
241 showsPrec p pNameLocal
242 showsPrec _p PName{pNameSpace=Just p, ..} =
243 showsPrec 10 p .
244 showChar ':' .
245 showsPrec 10 pNameLocal
246 instance IsString PName where
247 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
248 fromString s =
249 case List.break (== ':') s of
250 (_, "") -> PName Nothing $ fromString s
251 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
252 instance Hashable PName
253
254 pName :: NCName -> PName
255 pName = PName Nothing
256 {-# INLINE pName #-}
257
258 -- ** Type 'QName'
259 -- | Qualified name.
260 data QName = QName
261 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
262 , qNameLocal :: NCName -- ^ eg. "stylesheet"
263 } deriving (Eq, Ord, Generic)
264 instance Show QName where
265 showsPrec _p QName{..} =
266 (if TL.null $ unNamespace qNameSpace then id
267 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
268 ) . showsPrec 10 qNameLocal
269 instance IsString QName where
270 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
271 fromString full@('{':rest) =
272 case List.break (== '}') rest of
273 (_, "") -> error ("Invalid XML Clark notation: " <> show full)
274 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
275 fromString local = QName "" $ fromString local
276 instance Hashable QName
277
278 qName :: NCName -> QName
279 qName = QName (Namespace "")
280 {-# INLINE qName #-}
281
282 -- * Type 'Sourced'
283 data Sourced src a
284 = Sourced
285 { source :: src
286 , unSourced :: a
287 } deriving (Eq, Ord, Functor)
288 instance (Show src, Show a) => Show (Sourced src a) where
289 showsPrec p Sourced{..} =
290 showParen (p > 10) $
291 showsPrec 11 unSourced .
292 showString " @" . showsPrec 10 source
293 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
294 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
295 Sourced (FileRange fx bx ey :| lx) $
296 x<>fromPad (FilePos lines columns)<>y
297 where
298 lines = filePos_line by - filePos_line ex
299 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
300
301 -- ** Class 'FromPad'
302 class FromPad a where
303 fromPad :: FilePos -> a
304 instance FromPad T.Text where
305 fromPad FilePos{..} =
306 T.replicate filePos_line "\n" <>
307 T.replicate filePos_column " "
308 instance FromPad TL.Text where
309 fromPad FilePos{..} =
310 TL.replicate (fromIntegral filePos_line) "\n" <>
311 TL.replicate (fromIntegral filePos_column) " "
312
313 -- ** Class 'NoSource'
314 class NoSource src where
315 noSource :: src
316 instance NoSource FileSource where
317 noSource = noSource :| []
318 instance NoSource FileRange where
319 noSource = FileRange "" filePos1 filePos1
320 {-
321 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
322 mempty = sourced0 mempty
323 mappend = (<>)
324 -}
325
326 notSourced :: NoSource src => a -> Sourced src a
327 notSourced = Sourced noSource
328
329 -- * Type 'FileSource'
330 type FileSource = NonEmpty FileRange
331
332 -- ** Type 'FileRange'
333 data FileRange
334 = FileRange
335 { fileRange_file :: FilePath
336 , fileRange_begin :: FilePos
337 , fileRange_end :: FilePos
338 } deriving (Eq, Ord)
339 instance Default FileRange where
340 def = FileRange "" filePos1 filePos1
341 instance Show FileRange where
342 showsPrec _p FileRange{..} =
343 showString fileRange_file .
344 showChar '#' . showsPrec 10 fileRange_begin .
345 showChar '-' . showsPrec 10 fileRange_end
346
347 -- *** Type 'FilePos'
348 -- | Absolute text file position.
349 data FilePos = FilePos
350 { filePos_line :: {-# UNPACK #-} LineNum
351 , filePos_column :: {-# UNPACK #-} ColNum
352 } deriving (Eq, Ord)
353 instance Default FilePos where
354 def = filePos1
355 instance Show FilePos where
356 showsPrec _p FilePos{..} =
357 showsPrec 11 filePos_line .
358 showChar ':' .
359 showsPrec 11 filePos_column
360
361 filePos1 :: FilePos
362 filePos1 = FilePos 1 1
363
364 -- **** Type 'LineNum'
365 type LineNum = Int
366
367 -- **** Type 'ColNum'
368 type ColNum = Int