]> Git — Sourcephile - haskell/symantic-xml.git/blob - Language/Symantic/XML/Document.hs
XML: fix union of EscapedText
[haskell/symantic-xml.git] / Language / Symantic / XML / Document.hs
1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
12 , TS.Tree(..)
13 , TS.Trees
14 , TS.tree0
15 ) where
16
17 import Control.Applicative (Applicative(..), Alternative(..))
18 import Data.Bool
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
26 import Data.Int (Int)
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
46
47 -- * Type 'XML'
48 type XML = TS.Tree (Sourced FileSource Node)
49 type XMLs = Seq XML
50
51 -- | Unify two 'XMLs', merging border 'NodeText's if any.
52 union :: XMLs -> XMLs -> XMLs
53 union x y =
54 case (Seq.viewr x, Seq.viewl y) of
55 (xs Seq.:> x0, y0 Seq.:< ys) ->
56 case (x0,y0) of
57 ( Tree0 (Sourced ssx@(FileRange {fileRange_file=fx}:|_sx) (NodeText tx))
58 , Tree0 (Sourced ssy@(FileRange {fileRange_file=fy}:|_sy) (NodeText ty)) ) | fx == fy ->
59 xs `union`
60 Seq.singleton (Tree0 $ (NodeText <$>) $ Sourced ssx tx <> Sourced ssy ty) `union`
61 ys
62 _ -> x <> y
63 (Seq.EmptyR, _) -> y
64 (_, Seq.EmptyL) -> x
65
66 unions :: Foldable f => f XMLs -> XMLs
67 unions = foldl' union mempty
68
69 pattern Tree0 :: a -> TS.Tree a
70 pattern Tree0 a <- TS.Tree a (null -> True)
71 where Tree0 a = TS.Tree a Seq.empty
72
73 -- ** Type 'Node'
74 data Node
75 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
76 | NodeAttr QName -- ^ Node with a 'NodeText' child.
77 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
78 | NodeText EscapedText -- ^ Leaf.
79 | NodeComment TL.Text -- ^ Leaf.
80 | NodeCDATA TL.Text -- ^ Leaf.
81 deriving (Eq, Ord, Show)
82
83 -- ** Type 'EscapedText'
84 newtype EscapedText = EscapedText (Seq Escaped)
85 deriving (Eq, Ord, Show)
86
87 escapeText :: TL.Text -> EscapedText
88 escapeText s =
89 EscapedText $
90 case TL.span (`List.notElem` ("<>&'\""::String)) s of
91 (t, r) | TL.null t -> escape r
92 | otherwise -> EscapedPlain t Seq.<| escape r
93 where
94 escape t = case TL.uncons t of
95 Nothing -> mempty
96 Just (c, cs) -> escapeChar c Seq.<| et where EscapedText et = escapeText cs
97
98 escapeChar :: Char -> Escaped
99 escapeChar c =
100 case c of
101 '<' -> EscapedEntityRef entityRef_lt
102 '>' -> EscapedEntityRef entityRef_gt
103 '&' -> EscapedEntityRef entityRef_amp
104 '\'' -> EscapedEntityRef entityRef_apos
105 '"' -> EscapedEntityRef entityRef_quot
106 _ -> EscapedPlain $ TL.singleton c
107
108 unescapeText :: EscapedText -> TL.Text
109 unescapeText (EscapedText et) = (`foldMap` et) $ \case
110 EscapedPlain t -> t
111 EscapedEntityRef EntityRef{..} -> entityRef_value
112 EscapedCharRef (CharRef c) -> TL.singleton c
113
114 instance Semigroup EscapedText where
115 EscapedText x <> EscapedText y =
116 case (x,y) of
117 (xl Seq.:|> EscapedPlain xr, EscapedPlain yl Seq.:<|yr) ->
118 (EscapedText $ xl Seq.|> EscapedPlain (xr<>yl)) <> EscapedText yr
119 _ -> EscapedText $ x <> y
120 instance Monoid EscapedText where
121 mempty = EscapedText mempty
122 mappend = (<>)
123
124 -- *** Type 'Escaped'
125 -- | 'EscapedText' lexemes.
126 data Escaped
127 = EscapedPlain TL.Text
128 | EscapedEntityRef EntityRef
129 | EscapedCharRef CharRef
130 deriving (Eq, Ord, Show)
131
132 -- *** Type 'EntityRef'
133 data EntityRef = EntityRef
134 { entityRef_name :: NCName
135 , entityRef_value :: TL.Text
136 } deriving (Eq, Ord, Show)
137
138 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
139 entityRef_lt = EntityRef (NCName "lt") "<"
140 entityRef_gt = EntityRef (NCName "gt") ">"
141 entityRef_amp = EntityRef (NCName "amp") "&"
142 entityRef_quot = EntityRef (NCName "quot") "\""
143 entityRef_apos = EntityRef (NCName "apos") "'"
144
145 -- *** Type 'CharRef'
146 newtype CharRef = CharRef Char
147 deriving (Eq, Ord, Show)
148
149 -- ** Type 'Name'
150 newtype Name = Name { unName :: TL.Text }
151 deriving (Eq, Ord, Hashable)
152 instance Show Name where
153 showsPrec _p = showString . TL.unpack . unName
154 instance IsString Name where
155 fromString s
156 | c:cs <- s
157 , XC.isXmlNameStartChar c
158 && all XC.isXmlNameChar cs
159 = Name (TL.pack s)
160 | otherwise = error $ "Invalid XML Name: "<>show s
161
162 -- ** Type 'Namespace'
163 newtype Namespace = Namespace { unNamespace :: TL.Text }
164 deriving (Eq, Ord, Show, Hashable)
165 instance IsString Namespace where
166 fromString s =
167 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
168 then Namespace (fromString s)
169 else error $ "Invalid XML Namespace: "<>show s
170
171 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
172 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
173 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
174 xmlns_empty = Namespace ""
175
176 -- * Type 'Namespaces'
177 data Namespaces prefix = Namespaces
178 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
179 , namespaces_default :: Namespace
180 } deriving (Show)
181 instance Default (Namespaces NCName) where
182 def = Namespaces
183 { namespaces_prefixes = HM.fromList
184 [ (xmlns_xml , "xml")
185 , (xmlns_xmlns, "xmlns")
186 ]
187 , namespaces_default = ""
188 }
189 instance Default (Namespaces (Maybe NCName)) where
190 def = Namespaces
191 { namespaces_prefixes = HM.fromList
192 [ (xmlns_xml , Just "xml")
193 , (xmlns_xmlns, Just "xmlns")
194 ]
195 , namespaces_default = ""
196 }
197 instance Semigroup (Namespaces NCName) where
198 x <> y = Namespaces
199 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
200 , namespaces_default = namespaces_default x
201 }
202 instance Semigroup (Namespaces (Maybe NCName)) where
203 x <> y = Namespaces
204 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
205 , namespaces_default = namespaces_default x
206 }
207 instance Monoid (Namespaces NCName) where
208 mempty = def
209 mappend = (<>)
210 instance Monoid (Namespaces (Maybe NCName)) where
211 mempty = def
212 mappend = (<>)
213
214 prefixifyQName :: Namespaces NCName -> QName -> PName
215 prefixifyQName Namespaces{..} QName{..} =
216 PName
217 { pNameSpace =
218 if qNameSpace == namespaces_default
219 then Nothing
220 else HM.lookup qNameSpace namespaces_prefixes
221 , pNameLocal = qNameLocal
222 }
223
224 -- ** Type 'NCName'
225 -- | Non-colonized name.
226 newtype NCName = NCName { unNCName :: TL.Text }
227 deriving (Eq, Ord, Hashable)
228 instance Show NCName where
229 showsPrec _p = showString . TL.unpack . unNCName
230 instance IsString NCName where
231 fromString s =
232 fromMaybe (error $ "Invalid XML NCName: "<>show s) $
233 ncName (TL.pack s)
234
235 ncName :: TL.Text -> Maybe NCName
236 ncName t =
237 case TL.uncons t of
238 Just (c, cs)
239 | XC.isXmlNCNameStartChar c
240 , TL.all XC.isXmlNCNameChar cs
241 -> Just (NCName t)
242 _ -> Nothing
243
244 poolNCNames :: [NCName]
245 poolNCNames =
246 [ NCName $ TL.pack ("ns"<>show i)
247 | i <- [1 :: Int ..]
248 ]
249
250 freshNCName :: HS.HashSet NCName -> NCName
251 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
252
253 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
254 freshifyNCName ns (NCName n) =
255 let ints = [1..] :: [Int] in
256 List.head
257 [ fresh
258 | suffix <- mempty : (show <$> ints)
259 , fresh <- [ NCName $ n <> TL.pack suffix]
260 , not $ fresh `HS.member` ns
261 ]
262
263 -- ** Type 'PName'
264 -- | Prefixed name.
265 data PName = PName
266 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
267 , pNameLocal :: NCName -- ^ eg. "stylesheet"
268 } deriving (Eq, Ord, Generic)
269 instance Show PName where
270 showsPrec p PName{pNameSpace=Nothing, ..} =
271 showsPrec p pNameLocal
272 showsPrec _p PName{pNameSpace=Just p, ..} =
273 showsPrec 10 p .
274 showChar ':' .
275 showsPrec 10 pNameLocal
276 instance IsString PName where
277 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
278 fromString s =
279 case List.break (== ':') s of
280 (_, "") -> PName Nothing $ fromString s
281 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
282 instance Hashable PName
283
284 pName :: NCName -> PName
285 pName = PName Nothing
286 {-# INLINE pName #-}
287
288 -- ** Type 'QName'
289 -- | Qualified name.
290 data QName = QName
291 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
292 , qNameLocal :: NCName -- ^ eg. "stylesheet"
293 } deriving (Eq, Ord, Generic)
294 instance Show QName where
295 showsPrec _p QName{..} =
296 (if TL.null $ unNamespace qNameSpace then id
297 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
298 ) . showsPrec 10 qNameLocal
299 instance IsString QName where
300 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
301 fromString full@('{':rest) =
302 case List.break (== '}') rest of
303 (_, "") -> error $ "Invalid XML Clark notation: "<>show full
304 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
305 fromString local = QName "" $ fromString local
306 instance Hashable QName
307
308 qName :: NCName -> QName
309 qName = QName (Namespace "")
310 {-# INLINE qName #-}
311
312 -- * Type 'Sourced'
313 data Sourced src a
314 = Sourced
315 { source :: src
316 , unSourced :: a
317 } deriving (Eq, Ord, Functor)
318 instance (Show src, Show a) => Show (Sourced src a) where
319 showsPrec p Sourced{..} =
320 showParen (p > 10) $
321 showsPrec 11 unSourced .
322 showString " @" . showsPrec 10 source
323 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
324 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
325 Sourced (FileRange fx bx ey :| lx) $
326 x<>fromPad (FilePos lines columns)<>y
327 where
328 lines = filePos_line by - filePos_line ex
329 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
330
331 -- ** Class 'FromPad'
332 class FromPad a where
333 fromPad :: FilePos -> a
334 instance FromPad T.Text where
335 fromPad FilePos{..} =
336 T.replicate filePos_line "\n" <>
337 T.replicate filePos_column " "
338 instance FromPad TL.Text where
339 fromPad FilePos{..} =
340 TL.replicate (fromIntegral filePos_line) "\n" <>
341 TL.replicate (fromIntegral filePos_column) " "
342 instance FromPad EscapedText where
343 fromPad = EscapedText . pure . fromPad
344 instance FromPad Escaped where
345 fromPad = EscapedPlain . fromPad
346
347 -- ** Class 'NoSource'
348 class NoSource src where
349 noSource :: src
350 instance NoSource FileSource where
351 noSource = noSource :| []
352 instance NoSource FileRange where
353 noSource = FileRange "" filePos1 filePos1
354 {-
355 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
356 mempty = sourced0 mempty
357 mappend = (<>)
358 -}
359
360 notSourced :: NoSource src => a -> Sourced src a
361 notSourced = Sourced noSource
362
363 -- * Type 'FileSource'
364 type FileSource = NonEmpty FileRange
365
366 -- ** Type 'FileRange'
367 data FileRange
368 = FileRange
369 { fileRange_file :: FilePath
370 , fileRange_begin :: FilePos
371 , fileRange_end :: FilePos
372 } deriving (Eq, Ord)
373 instance Default FileRange where
374 def = FileRange "" filePos1 filePos1
375 instance Show FileRange where
376 showsPrec _p FileRange{..} =
377 showString fileRange_file .
378 showChar '#' . showsPrec 10 fileRange_begin .
379 showChar '-' . showsPrec 10 fileRange_end
380
381 -- *** Type 'FilePos'
382 -- | Absolute text file position.
383 data FilePos = FilePos
384 { filePos_line :: {-# UNPACK #-} LineNum
385 , filePos_column :: {-# UNPACK #-} ColNum
386 } deriving (Eq, Ord)
387 instance Default FilePos where
388 def = filePos1
389 instance Show FilePos where
390 showsPrec _p FilePos{..} =
391 showsPrec 11 filePos_line .
392 showChar ':' .
393 showsPrec 11 filePos_column
394
395 filePos1 :: FilePos
396 filePos1 = FilePos 1 1
397
398 -- **** Type 'LineNum'
399 type LineNum = Int
400
401 -- **** Type 'ColNum'
402 type ColNum = Int