+{-# LANGUAGE ConstraintKinds #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE Rank2Types #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+module Symantic.XML.Tree.Read where
+
+import Control.Arrow (left)
+import Control.Applicative (Applicative(..), Alternative(..))
+import Control.Monad (Monad(..), void, unless, forM)
+import Data.Bool
+import Data.Char (Char)
+import Data.Either (Either(..))
+import Data.Eq (Eq(..))
+import Data.Foldable (Foldable(..))
+import Data.Function (($), (.), const)
+import Data.Functor ((<$>), (<$))
+import Data.Maybe (Maybe(..), maybe, catMaybes)
+import Data.Monoid (Monoid(..))
+import Data.Ord (Ord(..))
+import Data.Semigroup (Semigroup(..))
+import Data.List.NonEmpty (NonEmpty(..))
+import Data.String (String, IsString(..))
+import Prelude (Num(..), Enum(..), Bounded(..), Integer, toInteger)
+import System.IO (FilePath, IO)
+import Text.Show (Show(..))
+import qualified Control.Exception as Exn
+import qualified Control.Monad.Trans.Reader as R
+import qualified Data.ByteString.Lazy as BSL
+import qualified Data.Char as Char
+import qualified Data.Char.Properties.XMLCharProps as XC
+import qualified Data.HashMap.Strict as HM
+import qualified Data.List as List
+import qualified Data.Set as Set
+import qualified Data.Sequence as Seq
+import qualified Data.Text.Encoding.Error as TL
+import qualified Data.Text.Lazy as TL
+import qualified Data.Text.Lazy.Encoding as TL
+import qualified Data.TreeSeq.Strict as TS
+import qualified System.IO.Error as IO
+import qualified Text.Megaparsec as P
+import qualified Text.Megaparsec.Char as P
+
+import Symantic.Base ()
+import Symantic.XML.Language hiding (void)
+import Symantic.XML.Tree.Source
+import Symantic.XML.Tree.Data
+
+readTree :: FilePath -> IO (Either String FileSourcedTrees)
+readTree path =
+ readUtf8 path >>= \case
+ Left err -> return $ Left $ show err
+ Right txt -> return $
+ case runReadTree path txt of
+ Right a -> Right a
+ Left err -> Left $ P.errorBundlePretty err
+
+runReadTree ::
+ FilePath -> TL.Text ->
+ Either (P.ParseErrorBundle TL.Text Error)
+ FileSourcedTrees
+runReadTree = P.runParser $ R.runReaderT p_document defaultReadTreeInh
+
+-- * Type 'ErrorRead'
+data ErrorRead
+ = ErrorRead_IO IO.IOError
+ | ErrorRead_Unicode TL.UnicodeException
+ deriving (Show)
+readUtf8 :: FilePath -> IO (Either ErrorRead TL.Text)
+readUtf8 path =
+ (left ErrorRead_Unicode . TL.decodeUtf8' <$> BSL.readFile path)
+ `Exn.catch` \e ->
+ if IO.isAlreadyInUseError e
+ || IO.isDoesNotExistError e
+ || IO.isPermissionError e
+ then return $ Left $ ErrorRead_IO e
+ else IO.ioError e
+
+-- * Type 'ReadTree'
+-- | Convenient alias.
+type ReadTree e s a =
+ ReadTreeConstraints e s a =>
+ R.ReaderT ReadTreeInh (P.Parsec e s) a
+
+-- ** Type 'ReadTreeConstraints'
+type ReadTreeConstraints e s a =
+ ( P.Stream s
+ , P.Token s ~ Char
+ , Ord e
+ , IsString (P.Tokens s)
+ , P.ShowErrorComponent e
+ )
+
+-- ** Type 'ReadTreeInh'
+data ReadTreeInh
+ = ReadTreeInh
+ { readTreeInh_source :: FileSource Offset
+ , readTreeInh_ns_scope :: HM.HashMap NCName Namespace
+ , readTreeInh_ns_default :: Namespace
+ } deriving (Show)
+
+defaultReadTreeInh :: ReadTreeInh
+defaultReadTreeInh = ReadTreeInh
+ { readTreeInh_source = FileSource $ pure $
+ FileRange mempty mempty mempty
+ , readTreeInh_ns_scope = HM.fromList
+ [ ("xml" , xmlns_xml)
+ , ("xmlns", xmlns_xmlns)
+ ]
+ , readTreeInh_ns_default = ""
+ }
+
+p_Offset :: ReadTree e s Offset
+p_Offset = Offset <$> P.getOffset
+{-# INLINE p_Offset #-}
+
+p_Sourced :: ReadTree e s a -> ReadTree e s (Sourced (FileSource Offset) a)
+p_Sourced pa = do
+ ReadTreeInh{readTreeInh_source} <- R.ask
+ b <- P.getParserState
+ let fileRange_path = P.sourceName $ P.pstateSourcePos $ P.statePosState b
+ let fileRange_begin = Offset $ P.stateOffset b
+ a <- pa
+ e <- P.getParserState
+ let fileRange_end = Offset $ P.stateOffset e
+ return $ Sourced (setSource FileRange{..} readTreeInh_source) a
+
+setSource :: FileRange pos -> FileSource pos -> FileSource pos
+setSource fileRange (FileSource (_curr:|next)) = FileSource (fileRange:|next)
+
+-- | Like 'p_Sourced' but uncoupled (through the use of 'p_SourcedEnd') for more flexibility.
+p_SourcedBegin :: ReadTree e s a -> ReadTree e s a
+p_SourcedBegin pa = do
+ b <- P.getParserState
+ let fileRange_path = P.sourceName $ P.pstateSourcePos $ P.statePosState b
+ let fileRange_begin = Offset $ P.stateOffset b
+ let fileRange_end = fileRange_begin
+ (`R.local` pa) $ \inh@ReadTreeInh{..} ->
+ inh{ readTreeInh_source = setSource FileRange{..} readTreeInh_source }
+
+-- | WARNING: only to be used within a 'p_SourcedBegin'.
+p_SourcedEnd :: ReadTree e s (a -> Sourced (FileSource Offset) a)
+p_SourcedEnd = do
+ ReadTreeInh{..} <- R.ask
+ e <- P.getParserState
+ let fileRange_end = Offset $ P.stateOffset e
+ return $ Sourced $
+ (\(FileSource (curr:|path)) -> FileSource (curr{fileRange_end}:|path))
+ readTreeInh_source
+
+-- * Type 'Error'
+data Error
+ = Error_CharRef_invalid Integer
+ -- ^ Well-formedness constraint: Legal Character.
+ --
+ -- Characters referred to using character references MUST match the production for Char.
+ | Error_EntityRef_unknown NCName
+ -- ^ Well-formedness constraint: Entity Declared
+ --
+ -- In a document without any DTD, a document with only an internal DTD
+ -- subset which contains no parameter entity references, or a document
+ -- with " standalone='yes' ", for an entity reference that does not occur
+ -- within the external subset or a parameter entity, the Name given in the
+ -- entity reference MUST match that in an entity declaration that does not
+ -- occur within the external subset or a parameter entity, except that
+ -- well-formed documents need not declare any of the following entities:
+ -- amp, lt, gt, apos, quot. The declaration of a general entity MUST
+ -- precede any reference to it which appears in a default value in an
+ -- attribute-list declaration.
+ --
+ -- Note that non-validating processors are not obligated to read and
+ -- process entity declarations occurring in parameter entities or in the
+ -- external subset; for such documents, the define that an entity must be
+ -- declared is a well-formedness constraint only if standalone='yes'.
+ | Error_Closing_tag_unexpected QName QName
+ -- ^ Well-formedness constraint: Element Type Match.
+ --
+ -- The Name in an element's end-tag MUST match the element type in the start-tag.
+ | Error_Attribute_collision QName
+ -- ^ Well-formedness constraint: Unique Att Spec.
+ --
+ -- An attribute name MUST NOT appear more than once in the same start-tag or empty-element tag.
+ | Error_PI_reserved PName
+ -- ^ The target names " XML ", " xml ", and so on are reserved for standardization.
+ | Error_Namespace_prefix_unknown NCName
+ -- ^ Namespace constraint: Prefix Declared
+ --
+ -- The namespace prefix, unless it is xml or xmlns, MUST have been declared in a namespace declaration attribute in either the start-tag of the element where the prefix is used or in an ancestor element (i.e., an element in whose content the prefixed markup occurs).
+ | Error_Namespace_empty NCName
+ -- ^ Namespace constraint: No Prefix Undeclaring
+ --
+ -- In a namespace declaration for a prefix (i.e., where the NSAttName is a PrefixedAttName), the attribute value MUST NOT be empty.
+ | Error_Namespace_reserved Namespace
+ | Error_Namespace_reserved_prefix NCName
+ -- ^ Namespace constraint: Reserved Prefixes and Namespace Names
+ --
+ -- The prefix xml is by definition bound to the namespace name
+ -- http://www.w3.org/XML/1998/namespace. It MAY, but need not, be
+ -- declared, and MUST NOT be bound to any other namespace name. Other
+ -- prefixes MUST NOT be bound to this namespace name, and it MUST NOT be
+ -- declared as the default namespace.
+ --
+ -- The prefix xmlns is used only to declare namespace bindings and is by
+ -- definition bound to the namespace name http://www.w3.org/2000/xmlns/.
+ -- It MUST NOT be declared . Other prefixes MUST NOT be bound to this
+ -- namespace name, and it MUST NOT be declared as the default namespace.
+ -- Element names MUST NOT have the prefix xmlns.
+ --
+ -- All other prefixes beginning with the three-letter sequence x, m, l, in
+ -- any case combination, are reserved. This means that:
+ --
+ -- - users SHOULD NOT use them except as defined by later specifications
+ -- - processors MUST NOT treat them as fatal errors.
+ deriving (Eq,Ord,Show)
+instance P.ShowErrorComponent Error where
+ showErrorComponent = show
+
+-- * Helpers
+p_error :: e -> ReadTree e s a
+p_error = P.fancyFailure . Set.singleton . P.ErrorCustom
+
+p_quoted :: P.Tokens s ~ TL.Text => (Char -> ReadTree e s a) -> ReadTree e s a
+p_quoted p =
+ P.between (P.char '"') (P.char '"') (p '"') <|>
+ P.between (P.char '\'') (P.char '\'') (p '\'')
+
+p_until ::
+ P.Tokens s ~ TL.Text =>
+ (Char -> Bool) -> (Char, TL.Text) -> ReadTree e s TL.Text
+p_until content (end, end_) =
+ (TL.concat <$>) $ P.many $
+ P.takeWhile1P Nothing (\c -> content c && c /= end) <|>
+ P.try (TL.singleton <$> P.char end <* P.notFollowedBy (P.string end_))
+
+p_until1 ::
+ P.Tokens s ~ TL.Text =>
+ (Char -> Bool) -> (Char, TL.Text) -> ReadTree e s TL.Text
+p_until1 content (end, end_) =
+ (TL.concat <$>) $ P.some $
+ P.takeWhile1P Nothing (\c -> content c && c /= end) <|>
+ P.try (TL.singleton <$> P.char end <* P.notFollowedBy (P.string end_))
+
+-- * Document
+p_document :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTrees
+p_document = do
+ ps <- p_prolog
+ e <- p_Element
+ m <- p_Miscs
+ P.eof
+ return (ps <> pure e <> m)
+
+-- ** Prolog
+p_prolog :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTrees
+p_prolog = (<>)
+ <$> P.option Seq.empty (pure <$> p_XMLDecl)
+ <*> p_Miscs
+
+-- ** Misc
+p_Miscs :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTrees
+p_Miscs = (Seq.fromList . catMaybes <$>) $ P.many $
+ Just <$> p_Comment <|>
+ Just <$> p_PI <|>
+ Nothing <$ p_Spaces1
+
+-- ** XMLDecl
+p_XMLDecl :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_XMLDecl = do
+ Sourced src as <- p_Sourced $ P.between (P.string "<?xml") (P.string "?>") $ do
+ vi <- pure <$> p_VersionInfo
+ ed <- P.option Seq.empty $ pure <$> p_EncodingDecl
+ sd <- P.option Seq.empty $ pure <$> p_SDDecl
+ p_Spaces
+ return $ vi <> ed <> sd
+ return $ TS.Tree (Sourced src $ NodePI "xml" "") as
+
+p_VersionInfo :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_VersionInfo = do
+ Sourced src v <- p_Sourced $ do
+ P.try $ p_Spaces1 <* P.string "version"
+ p_Eq
+ p_quoted $ const $
+ (<>)
+ <$> P.string "1."
+ <*> P.takeWhile1P Nothing Char.isDigit
+ return $ TS.tree0 $ Sourced src $ NodePI "version" v
+
+p_EncodingDecl :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_EncodingDecl = do
+ Sourced src v <- p_Sourced $ do
+ P.try $ p_Spaces1 <* P.string "encoding"
+ p_Eq
+ p_quoted $ const p_EncName
+ return $ TS.tree0 $ Sourced src $ NodePI "encoding" v
+
+p_EncName :: P.Tokens s ~ TL.Text => ReadTree Error s TL.Text
+p_EncName = P.label "EncName" $ do
+ P.notFollowedBy (P.satisfy $ not . isAlpha)
+ P.takeWhile1P Nothing $ \c ->
+ isAlpha c || Char.isDigit c ||
+ c=='.' || c=='_' || c=='-'
+ where isAlpha c = Char.isAsciiLower c || Char.isAsciiUpper c
+
+-- *** SDDecl
+p_SDDecl :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_SDDecl = do
+ Sourced src v <- p_Sourced $ do
+ P.try $ p_Spaces1 <* P.string "standalone"
+ p_Eq
+ p_quoted $ const $ P.string "yes" <|> P.string "no"
+ return $ TS.tree0 $ Sourced src $ NodePI "standalone" v
+
+-- ** CharData
+p_CharData :: P.Tokens s ~ TL.Text => ReadTree e s EscapedText
+p_CharData = P.label "[^<&]" $ escapeText <$>
+ p_until1 (\c -> XC.isXmlChar c && c/='<' && c/='&') (']',"]>")
+
+-- ** Comment
+p_Comment :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_Comment = p_SourcedBegin $ P.string "<!--" *> p_Comment__
+p_Comment_ :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_Comment_ = P.string "--" *> p_Comment__
+p_Comment__:: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_Comment__ = do
+ c <- p_until XC.isXmlChar ('-', "-")
+ void $ P.string "-->"
+ src <- p_SourcedEnd
+ return $ TS.tree0 $ src $ NodeComment c
+
+-- ** CDATA
+p_CDSect :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_CDSect = p_SourcedBegin $ P.string "<![CDATA[" *> p_CDSect__
+p_CDSect_ :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_CDSect_ = P.string "[CDATA[" *> p_CDSect__
+p_CDSect__ :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_CDSect__ = do
+ c <- p_until XC.isXmlChar (']', "]>")
+ void $ P.string "]]>"
+ src <- p_SourcedEnd
+ return $ TS.tree0 $ src $ NodeCDATA c
+
+-- ** PI
+p_PI :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_PI = p_SourcedBegin $ P.string "<?" *> p_PI__
+p_PI_ :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_PI_ = P.char '?' *> p_PI__
+p_PI__ :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_PI__ = do
+ n <- p_PITarget
+ v <- P.option "" $ p_Spaces1 *> p_until XC.isXmlChar ('?', ">")
+ void $ P.string "?>"
+ src <- p_SourcedEnd
+ return $ TS.tree0 $ src $ NodePI n v
+p_PITarget :: P.Tokens s ~ TL.Text => ReadTree Error s PName
+p_PITarget = do
+ n <- p_PName
+ case n of
+ PName{pNameSpace=Nothing, pNameLocal=NCName l}
+ | "xml" == TL.toLower l -> p_error $ Error_PI_reserved n
+ _ -> return n
+
+-- ** Element
+p_Element :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_Element = p_SourcedBegin $ (P.char '<' *> p_Element_)
+p_Element_ :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_Element_ = p_STag
+
+-- *** STag
+p_STag :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
+p_STag = do
+ n <- p_PName
+ attrs <- P.many $ p_Attribute
+ p_Spaces
+ ro <- R.ask
+ elemNS :: HM.HashMap NCName Namespace <-
+ (HM.fromList . List.concat <$>) $ forM attrs $ \case
+ (PName{..}, Sourced _ av)
+ | ns <- Namespace $ unescapeAttr av
+ , Nothing <- pNameSpace
+ , NCName "xmlns" <- pNameLocal ->
+ -- Default namespace declaration
+ case ns of
+ _ | ns == xmlns_xml -- DOC: it MUST NOT be declared as the default namespace
+ || ns == xmlns_xmlns -- DOC: it MUST NOT be declared as the default namespace
+ -> p_error $ Error_Namespace_reserved ns
+ _ -> return [(NCName "" , ns)]
+ | ns <- Namespace $ unescapeAttr av
+ , Just (NCName "xmlns") <- pNameSpace ->
+ -- Namespace prefix declaration
+ case unNCName pNameLocal of
+ "xml" -- DOC: It MAY, but need not, be declared,
+ -- and MUST NOT be bound to any other namespace name.
+ | ns == xmlns_xml -> return []
+ | otherwise -> p_error $ Error_Namespace_reserved_prefix pNameLocal
+ "xmlns" -- DOC: It MUST NOT be declared
+ -> p_error $ Error_Namespace_reserved_prefix pNameLocal
+ local | "xml" <- TL.toLower $ TL.take 3 local -> return []
+ -- DOC: All other prefixes beginning with the three-letter
+ -- sequence x, m, l, in any case combination, are reserved.
+ -- This means that: processors MUST NOT treat them as fatal errors.
+ _ | ns == xmlns_xml -- DOC: Other prefixes MUST NOT be bound to this namespace name.
+ || ns == xmlns_xmlns -- DOC: Other prefixes MUST NOT be bound to this namespace name.
+ -> p_error $ Error_Namespace_reserved ns
+ _ -> return [(pNameLocal, ns)]
+ | otherwise -> return []
+ let scopeNS = elemNS <> readTreeInh_ns_scope ro
+ let defaultNS = HM.lookupDefault (readTreeInh_ns_default ro) (NCName "") scopeNS
+ let
+ lookupNamePrefix prefix =
+ maybe (p_error $ Error_Namespace_prefix_unknown prefix) return $
+ HM.lookup prefix scopeNS
+ elemName :: QName <-
+ -- Expand element's QName
+ case pNameSpace n of
+ Nothing -> return QName{qNameSpace=defaultNS, qNameLocal=pNameLocal n}
+ -- DOC: If there is a default namespace declaration in scope,
+ -- the expanded name corresponding to an unprefixed element name
+ -- has the URI of the default namespace as its namespace name.
+ Just prefix
+ | NCName "xmlns" <- prefix ->
+ -- DOC: Element names MUST NOT have the prefix xmlns.
+ p_error $ Error_Namespace_reserved_prefix prefix
+ | otherwise -> do
+ ns <- lookupNamePrefix prefix
+ return QName{qNameSpace=ns, qNameLocal=pNameLocal n}
+ elemAttrs :: [(QName, FileSourced EscapedAttr)] <-
+ -- Expand attributes' PName into QName
+ forM attrs $ \(an, av) -> do
+ ns <- maybe (return "") lookupNamePrefix $ pNameSpace an
+ let qn = QName{qNameSpace=ns, qNameLocal=pNameLocal an}
+ return (qn, av)
+ -- Check for attribute collision
+ let
+ attrsByQName :: HM.HashMap QName [FileSourced EscapedAttr] =
+ HM.fromListWith (<>) $ (<$> elemAttrs) $
+ \(an, av) -> (an, [av])
+ case HM.toList $ HM.filter (\x -> length x > 1) attrsByQName of
+ (an, _):_ -> p_error $ Error_Attribute_collision an
+ _ -> return ()
+ content :: FileSourcedTrees <-
+ mempty <$ P.string "/>" <|>
+ R.local
+ (const ro
+ { readTreeInh_ns_scope = scopeNS
+ , readTreeInh_ns_default = defaultNS
+ })
+ (P.char '>' *> p_content <* p_ETag elemName)
+ src <- p_SourcedEnd
+ return $ TS.Tree (src $ NodeElem elemName (List.head <$> attrsByQName)) content
+
+-- *** Attribute
+-- | Note: despite the type, the returned 'FileSource'
+-- encompasses also the attribute 'PName'.
+-- It is pushed in the attribute value to fit the insertion
+-- of the attribute into a 'HM.HashMap'.
+p_Attribute :: P.Tokens s ~ TL.Text => ReadTree Error s (PName, FileSourced EscapedAttr)
+p_Attribute =
+ p_SourcedBegin $ do
+ an <- P.try $ p_Spaces1 *> p_PName
+ void p_Eq
+ av <- p_AttrValue
+ src <- p_SourcedEnd
+ return (an, src av)
+
+p_AttrValue :: P.Tokens s ~ TL.Text => ReadTree Error s EscapedAttr
+p_AttrValue = p_quoted p_AttrValueText
+
+p_AttrValueText :: P.Tokens s ~ TL.Text => Char -> ReadTree Error s EscapedAttr
+p_AttrValueText q =
+ EscapedAttr . Seq.fromList <$> P.many (
+ p_Reference <|>
+ -- Supplementary alternative to always escape the quote
+ -- as expected by 'EscapedAttr'.
+ (if q /= '\"' then EscapedEntityRef entityRef_quot <$ P.char '"' else P.empty) <|>
+ EscapedPlain <$> P.label ("[^<&"<>[q]<>"]")
+ (P.takeWhile1P Nothing $ \c ->
+ XC.isXmlChar c &&
+ c `List.notElem` (q:"<&")
+ )
+ )
+
+-- * content
+p_content :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTrees
+p_content =
+ (Seq.fromList <$>) $ P.many $
+ (p_SourcedBegin $ do
+ P.try $ P.char '<' *> P.notFollowedBy (P.char '/')
+ p_Element_ <|> p_PI_ <|> (P.char '!' *> (p_Comment_ <|> p_CDSect_))
+ )
+ <|> (
+ (TS.tree0 <$>) $
+ p_Sourced $ NodeText . EscapedText . foldMap unEscapedText
+ <$> P.some (
+ p_CharData <|>
+ EscapedText . pure <$> p_Reference
+ )
+ )
+
+-- *** ETag
+p_ETag :: P.Tokens s ~ TL.Text => QName -> ReadTree Error s ()
+p_ETag expected = do
+ got <- P.string "</" *> p_QName <* p_Spaces <* P.char '>'
+ unless (got == expected) $
+ p_error $ Error_Closing_tag_unexpected got expected
+
+-- * PName
+p_PName :: P.Tokens s ~ TL.Text => ReadTree e s PName
+p_PName = do
+ n <- p_NCName
+ s <- P.optional $ P.try $ P.char ':' *> p_NCName
+ return $ case s of
+ Nothing -> PName{pNameSpace=Nothing, pNameLocal=n}
+ Just l -> PName{pNameSpace=Just n , pNameLocal=l}
+
+-- * QName
+p_QName :: P.Tokens s ~ TL.Text => ReadTree Error s QName
+p_QName = do
+ n <- p_NCName
+ s <- P.optional $ P.try $ P.char ':' *> p_NCName
+ ReadTreeInh{..} <- R.ask
+ case s of
+ Nothing -> return QName{qNameSpace=readTreeInh_ns_default, qNameLocal=n}
+ Just l ->
+ case HM.lookup n readTreeInh_ns_scope of
+ Nothing -> p_error $ Error_Namespace_prefix_unknown n
+ Just ns -> return QName{qNameSpace=ns, qNameLocal=l}
+
+-- ** NCName
+p_NCName :: P.Tokens s ~ TL.Text => ReadTree e s NCName
+p_NCName = P.label "NCName" $ NCName
+ <$ P.notFollowedBy (P.satisfy (not . XC.isXmlNCNameStartChar))
+ <*> P.takeWhile1P Nothing XC.isXmlNCNameChar
+
+-- * Reference
+p_Reference :: P.Tokens s ~ TL.Text => ReadTree Error s Escaped
+p_Reference =
+ EscapedCharRef <$> p_CharRef <|>
+ EscapedEntityRef <$> p_EntityRef
+
+-- ** EntityRef
+p_EntityRef :: P.Tokens s ~ TL.Text => ReadTree Error s EntityRef
+p_EntityRef = do
+ ref <- P.char '&' *> p_NCName <* P.char ';'
+ EntityRef ref <$> lookupEntityRef ref
+ where
+ -- Because entities are declared in the (unimplemented) DTD,
+ -- only builtins entities are supported for now.
+ lookupEntityRef (NCName "lt" ) = pure "<"
+ lookupEntityRef (NCName "gt" ) = pure ">"
+ lookupEntityRef (NCName "amp" ) = pure "&"
+ lookupEntityRef (NCName "apos") = pure "'"
+ lookupEntityRef (NCName "quot") = pure "\""
+ lookupEntityRef n = p_error $ Error_EntityRef_unknown n
+
+-- ** CharRef
+p_CharRef :: P.Tokens s ~ TL.Text => ReadTree Error s CharRef
+p_CharRef =
+ do
+ ref <- readHexadecimal
+ <$ P.string "&#x"
+ <*> P.some P.hexDigitChar
+ <* P.char ';'
+ check ref
+ <|> do
+ ref <- readDecimal
+ <$ P.string "&#"
+ <*> P.some P.digitChar
+ <* P.char ';'
+ check ref
+ where
+ check i =
+ let c = toEnum (fromInteger i) in
+ if i <= toInteger (fromEnum (maxBound::Char))
+ && XC.isXmlChar c
+ then pure $ CharRef c
+ else p_error $ Error_CharRef_invalid i
+
+readInt :: Integer -> String -> Integer
+readInt base digits =
+ sign * List.foldl' acc 0 (List.concatMap digToInt digits1)
+ where
+ acc q r = q*base + r
+ (sign, digits1) =
+ case digits of
+ [] -> (1, digits)
+ c:ds | c == '-' -> (-1, ds)
+ | c == '+' -> ( 1, ds)
+ | otherwise -> ( 1, digits)
+ ord = toInteger . Char.ord
+ digToInt c
+ | Char.isDigit c = [ord c - ord '0']
+ | Char.isAsciiLower c = [ord c - ord 'a' + 10]
+ | Char.isAsciiUpper c = [ord c - ord 'A' + 10]
+ | otherwise = []
+
+readDecimal :: String -> Integer
+readDecimal = readInt 10
+
+readHexadecimal :: String -> Integer
+readHexadecimal = readInt 16
+
+-- * Char
+p_Char :: P.Tokens s ~ TL.Text => ReadTree e s Char
+p_Char = P.label "XmlChar" $ P.satisfy XC.isXmlCharCR <|> p_CRLF
+{-# INLINE p_Char #-}
+
+-- ** Space
+-- | Map '\r' and '\r\n' to '\n'.
+-- See: https://www.w3.org/TR/xml/#sec-line-ends
+p_CRLF :: P.Tokens s ~ TL.Text => ReadTree e s Char
+p_CRLF = P.char '\r' *> P.option '\n' (P.char '\n')
+
+p_Space :: P.Tokens s ~ TL.Text => ReadTree e s Char
+p_Space = P.label "space" $ P.satisfy XC.isXmlSpaceCharCR <|> p_CRLF
+{-# INLINE p_Space #-}
+
+p_Spaces :: P.Tokens s ~ TL.Text => ReadTree e s ()
+p_Spaces = P.label "spaces" $ void $ P.takeWhileP Nothing XC.isXmlSpaceChar
+{-# INLINE p_Spaces #-}
+
+p_Spaces1 :: P.Tokens s ~ TL.Text => ReadTree e s ()
+p_Spaces1 = P.label "spaces" $ void $ P.takeWhile1P Nothing XC.isXmlSpaceChar
+{-# INLINE p_Spaces1 #-}
+
+-- * Eq
+p_separator :: P.Tokens s ~ TL.Text => Char -> ReadTree e s ()
+p_separator c = P.try (() <$ p_Spaces <* P.char c) <* p_Spaces
+
+p_Eq :: P.Tokens s ~ TL.Text => ReadTree e s ()
+p_Eq = p_separator '='