1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Symantic.XML.Document
11 ( module Symantic.XML.Document
17 import Control.Applicative (Alternative(..))
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((+), error)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text.Lazy as TL
44 import qualified Data.TreeSeq.Strict as TS
47 type XML src = TS.Tree (Sourced src Node)
48 type XMLs src = Seq (XML src)
50 -- | Unify two 'XMLs', merging border 'NodeText's if any.
51 union :: Semigroup (Sourced src EscapedText) => XMLs src -> XMLs src -> XMLs src
53 case (Seq.viewr x, Seq.viewl y) of
54 (xs Seq.:> x0, y0 Seq.:< ys) ->
56 ( Tree0 (Sourced sx (NodeText tx))
57 , Tree0 (Sourced sy (NodeText ty)) ) ->
59 Seq.singleton (Tree0 $ (NodeText <$>) $ Sourced sx tx <> Sourced sy ty) `union`
66 Semigroup (Sourced src EscapedText) =>
67 Foldable f => f (XMLs src) -> XMLs src
68 unions = foldl' union mempty
70 pattern Tree0 :: a -> TS.Tree a
71 pattern Tree0 a <- TS.Tree a (null -> True)
72 where Tree0 a = TS.Tree a Seq.empty
76 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
77 | NodeAttr QName -- ^ Node with a 'NodeText' child.
78 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
79 | NodeText EscapedText -- ^ Leaf.
80 | NodeComment TL.Text -- ^ Leaf.
81 | NodeCDATA TL.Text -- ^ Leaf.
82 deriving (Eq, Ord, Show)
84 -- ** Type 'EscapedText'
85 newtype EscapedText = EscapedText (Seq Escaped)
86 deriving (Eq, Ord, Show)
88 escapeText :: TL.Text -> EscapedText
91 case TL.span (`List.notElem` ("<>&'\""::String)) s of
92 (t, r) | TL.null t -> escape r
93 | otherwise -> EscapedPlain t Seq.<| escape r
95 escape t = case TL.uncons t of
97 Just (c, cs) -> escapeChar c Seq.<| et where EscapedText et = escapeText cs
99 escapeChar :: Char -> Escaped
102 '<' -> EscapedEntityRef entityRef_lt
103 '>' -> EscapedEntityRef entityRef_gt
104 '&' -> EscapedEntityRef entityRef_amp
105 '\'' -> EscapedEntityRef entityRef_apos
106 '"' -> EscapedEntityRef entityRef_quot
107 _ -> EscapedPlain $ TL.singleton c
109 unescapeText :: EscapedText -> TL.Text
110 unescapeText (EscapedText et) = (`foldMap` et) $ \case
112 EscapedEntityRef EntityRef{..} -> entityRef_value
113 EscapedCharRef (CharRef c) -> TL.singleton c
115 instance Semigroup EscapedText where
116 EscapedText x <> EscapedText y =
118 (xl Seq.:|> EscapedPlain xr, EscapedPlain yl Seq.:<|yr) ->
119 (EscapedText $ xl Seq.|> EscapedPlain (xr<>yl)) <> EscapedText yr
120 _ -> EscapedText $ x <> y
121 instance Monoid EscapedText where
122 mempty = EscapedText mempty
125 -- *** Type 'Escaped'
126 -- | 'EscapedText' lexemes.
128 = EscapedPlain TL.Text
129 | EscapedEntityRef EntityRef
130 | EscapedCharRef CharRef
131 deriving (Eq, Ord, Show)
133 -- *** Type 'EntityRef'
134 data EntityRef = EntityRef
135 { entityRef_name :: NCName
136 , entityRef_value :: TL.Text
137 } deriving (Eq, Ord, Show)
139 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
140 entityRef_lt = EntityRef (NCName "lt") "<"
141 entityRef_gt = EntityRef (NCName "gt") ">"
142 entityRef_amp = EntityRef (NCName "amp") "&"
143 entityRef_quot = EntityRef (NCName "quot") "\""
144 entityRef_apos = EntityRef (NCName "apos") "'"
146 -- *** Type 'CharRef'
147 newtype CharRef = CharRef Char
148 deriving (Eq, Ord, Show)
151 newtype Name = Name { unName :: TL.Text }
152 deriving (Eq, Ord, Hashable)
153 instance Show Name where
154 showsPrec _p = showString . TL.unpack . unName
155 instance IsString Name where
158 , XC.isXmlNameStartChar c
159 && all XC.isXmlNameChar cs
161 | otherwise = error $ "Invalid XML Name: "<>show s
163 -- ** Type 'Namespace'
164 newtype Namespace = Namespace { unNamespace :: TL.Text }
165 deriving (Eq, Ord, Show, Hashable)
166 instance IsString Namespace where
168 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
169 then Namespace (fromString s)
170 else error $ "Invalid XML Namespace: "<>show s
172 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
173 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
174 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
175 xmlns_empty = Namespace ""
177 -- * Type 'Namespaces'
178 data Namespaces prefix = Namespaces
179 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
180 , namespaces_default :: Namespace
182 instance Default (Namespaces NCName) where
184 { namespaces_prefixes = HM.fromList
185 [ (xmlns_xml , "xml")
186 , (xmlns_xmlns, "xmlns")
188 , namespaces_default = ""
190 instance Default (Namespaces (Maybe NCName)) where
192 { namespaces_prefixes = HM.fromList
193 [ (xmlns_xml , Just "xml")
194 , (xmlns_xmlns, Just "xmlns")
196 , namespaces_default = ""
198 instance Semigroup (Namespaces NCName) where
200 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
201 , namespaces_default = namespaces_default x
203 instance Semigroup (Namespaces (Maybe NCName)) where
205 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
206 , namespaces_default = namespaces_default x
208 instance Monoid (Namespaces NCName) where
211 instance Monoid (Namespaces (Maybe NCName)) where
215 prefixifyQName :: Namespaces NCName -> QName -> PName
216 prefixifyQName Namespaces{..} QName{..} =
219 if qNameSpace == namespaces_default
221 else HM.lookup qNameSpace namespaces_prefixes
222 , pNameLocal = qNameLocal
226 -- | Non-colonized name.
227 newtype NCName = NCName { unNCName :: TL.Text }
228 deriving (Eq, Ord, Hashable)
229 instance Show NCName where
230 showsPrec _p = showString . TL.unpack . unNCName
231 instance IsString NCName where
233 fromMaybe (error $ "Invalid XML NCName: "<>show s) $
236 ncName :: TL.Text -> Maybe NCName
240 | XC.isXmlNCNameStartChar c
241 , TL.all XC.isXmlNCNameChar cs
245 poolNCNames :: [NCName]
247 [ NCName $ TL.pack ("ns"<>show i)
251 freshNCName :: HS.HashSet NCName -> NCName
252 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
254 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
255 freshifyNCName ns (NCName n) =
256 let ints = [1..] :: [Int] in
259 | suffix <- mempty : (show <$> ints)
260 , fresh <- [ NCName $ n <> TL.pack suffix]
261 , not $ fresh `HS.member` ns
267 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
268 , pNameLocal :: NCName -- ^ eg. "stylesheet"
269 } deriving (Eq, Ord, Generic)
270 instance Show PName where
271 showsPrec p PName{pNameSpace=Nothing, ..} =
272 showsPrec p pNameLocal
273 showsPrec _p PName{pNameSpace=Just p, ..} =
276 showsPrec 10 pNameLocal
277 instance IsString PName where
278 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
280 case List.break (== ':') s of
281 (_, "") -> PName Nothing $ fromString s
282 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
283 instance Hashable PName
285 pName :: NCName -> PName
286 pName = PName Nothing
292 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
293 , qNameLocal :: NCName -- ^ eg. "stylesheet"
294 } deriving (Eq, Ord, Generic)
295 instance Show QName where
296 showsPrec _p QName{..} =
297 (if TL.null $ unNamespace qNameSpace then id
298 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
299 ) . showsPrec 10 qNameLocal
300 instance IsString QName where
301 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
302 fromString full@('{':rest) =
303 case List.break (== '}') rest of
304 (_, "") -> error $ "Invalid XML Clark notation: "<>show full
305 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
306 fromString local = QName "" $ fromString local
307 instance Hashable QName
309 qName :: NCName -> QName
310 qName = QName (Namespace "")
318 } deriving (Eq, Ord, Functor)
319 instance (Show src, Show a) => Show (Sourced src a) where
320 showsPrec p Sourced{..} =
322 showsPrec 11 unSourced .
323 showString " @" . showsPrec 10 source
324 instance Semigroup a => Semigroup (Sourced (FileSource Offset) a) where
326 (Sourced rx@(FileRange xf xb xe :| xs) x)
327 (Sourced (FileRange yf yb ye :| _ys) y)
328 | xf == yf && xe == yb = Sourced (FileRange xf xb ye :| xs) $ x<>y
329 | otherwise = Sourced rx (x<>y)
331 instance (FromPad a, Semigroup a) => Semigroup (Sourced (FileSource LineCol) a) where
333 (Sourced rx@(FileRange xf xb xe :| xs) x)
334 (Sourced (FileRange yf yb ye :| _ys) y)
335 | xf == yf = Sourced (FileRange xf xb ye :| xs) $ x<>fromPad (LineColumn l c)<>y
336 | otherwise = Sourced rx (x<>y)
338 l = lineNum yb - lineNum xe
339 c = colNum yb - colNum (if l <= 0 then xe else xb)
341 -- ** Class 'FromPad'
342 class FromPad a where
343 fromPad :: LineColumn -> a
344 instance FromPad T.Text where
345 fromPad LineColumn{..} =
346 T.replicate lineNum "\n" <>
347 T.replicate colNum " "
348 instance FromPad TL.Text where
349 fromPad LineColumn{..} =
350 TL.replicate (fromIntegral lineNum) "\n" <>
351 TL.replicate (fromIntegral colNum) " "
352 instance FromPad EscapedText where
353 fromPad = EscapedText . pure . fromPad
354 instance FromPad Escaped where
355 fromPad = EscapedPlain . fromPad
358 -- ** Class 'NoSource'
359 class NoSource src where
361 instance Default pos => NoSource (FileSource pos) where
362 noSource = noSource :| []
363 instance Default pos => NoSource (FileRange pos) where
364 noSource = FileRange "" def def
365 instance NoSource Offset where
366 noSource = Offset def
368 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
369 mempty = sourced0 mempty
373 notSourced :: NoSource src => a -> Sourced src a
374 notSourced = Sourced noSource
376 -- * Type 'FileSource'
377 type FileSource pos = NonEmpty (FileRange pos)
379 -- ** Type 'FileSourced'
380 type FileSourced = Sourced (FileSource Offset)
382 -- ** Type 'FileRange'
385 { fileRange_file :: FilePath
386 , fileRange_begin :: pos
387 , fileRange_end :: pos
389 instance Default (FileRange Offset) where
390 def = FileRange "" def def
391 instance Default (FileRange LineColumn) where
392 def = FileRange "" def def
393 instance Show (FileRange Offset) where
394 showsPrec _p FileRange{..} =
395 showString fileRange_file .
396 showChar '@' . showsPrec 10 fileRange_begin .
397 showChar '-' . showsPrec 10 fileRange_end
398 instance Show (FileRange LineColumn) where
399 showsPrec _p FileRange{..} =
400 showString fileRange_file .
401 showChar '#' . showsPrec 10 fileRange_begin .
402 showChar '-' . showsPrec 10 fileRange_end
405 newtype Offset = Offset Int
407 instance Show Offset where
408 showsPrec p (Offset o) = showsPrec p o
409 instance Default Offset where
411 instance Semigroup Offset where
412 Offset x <> Offset y = Offset (x+y)
413 instance Monoid Offset where
417 -- *** Type 'LineColumn'
418 -- | Absolute text file position.
419 data LineColumn = LineColumn
420 { lineNum :: {-# UNPACK #-} Offset
421 , colNum :: {-# UNPACK #-} Offset
423 instance Default LineColumn where
424 def = LineColumn def def
425 instance Show LineColumn where
426 showsPrec _p LineColumn{..} =
427 showsPrec 11 lineNum .
431 filePos1 :: LineColumn