{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE Rank2Types #-} {-# LANGUAGE StrictData #-} {-# LANGUAGE TypeFamilies #-} module Language.Symantic.XML.Read.Parser where import Control.Applicative (Applicative(..), Alternative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Default.Class (Default(..)) import Data.Eq (Eq(..)) import Data.Function (($), (.)) import Data.Functor ((<$>)) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..)) import Data.Ord (Ord(..)) import Data.String (IsString) import Prelude (Integer) import Text.Show (Show(..)) import qualified Control.Monad.Trans.Reader as R import qualified Data.HashMap.Strict as HM import qualified Data.Set as Set import qualified Data.Text.Lazy as TL import qualified Text.Megaparsec as P import qualified Text.Megaparsec.Char as P import Language.Symantic.XML.Document -- * Type 'Parser' -- | Convenient alias. type Parser e s a = Parsable e s a => R.ReaderT Reader (P.Parsec e s) a -- ** Type 'Parsable' type Parsable e s a = ( P.Stream s , P.Token s ~ Char , Ord e , IsString (P.Tokens s) , P.ShowErrorComponent e ) -- ** Type 'Reader' data Reader = Reader { reader_source :: FileSource Offset , reader_ns_scope :: HM.HashMap NCName Namespace , reader_ns_default :: Namespace } deriving (Show) instance Default Reader where def = Reader { reader_source = pure def , reader_ns_scope = HM.fromList [ ("xml" , xmlns_xml) , ("xmlns", xmlns_xmlns) ] , reader_ns_default = "" } p_Offset :: Parser e s Offset p_Offset = Offset <$> P.getOffset {-# INLINE p_Offset #-} p_Sourced :: Parser e s a -> Parser e s (Sourced (FileSource Offset) a) p_Sourced pa = do Reader{reader_source} <- R.ask b <- P.getParserState let fileRange_file = P.sourceName $ P.pstateSourcePos $ P.statePosState b let fileRange_begin = Offset $ P.stateOffset b a <- pa e <- P.getParserState let fileRange_end = Offset $ P.stateOffset e return $ Sourced (setSource FileRange{..} reader_source) a setSource :: FileRange pos -> FileSource pos -> FileSource pos setSource fileRange (_curr:|next) = fileRange :| next -- | Like 'p_Sourced' but uncoupled (through the use of 'p_SourcedEnd') for more flexibility. p_SourcedBegin :: Parser e s a -> Parser e s a p_SourcedBegin pa = do b <- P.getParserState let fileRange_file = P.sourceName $ P.pstateSourcePos $ P.statePosState b let fileRange_begin = Offset $ P.stateOffset b let fileRange_end = fileRange_begin (`R.local` pa) $ \ro@Reader{..} -> ro{ reader_source = setSource FileRange{..} reader_source } -- | WARNING: Only to be used within a 'p_SourcedBegin'. p_SourcedEnd :: Parser e s (a -> Sourced (FileSource Offset) a) p_SourcedEnd = do Reader{..} <- R.ask e <- P.getParserState let fileRange_end = Offset $ P.stateOffset e return $ Sourced $ (\(curr:|path) -> curr{fileRange_end}:|path) reader_source {- -- ** Type 'StreamSourced' -- | Wrap 'TL.Text' to have a 'P.Stream' instance -- whose 'P.advance1' method abuses the tab width state -- to instead pass the line indent. -- This in order to report correct 'P.SourcePos' -- when parsing a 'Sourced' containing newlines. newtype StreamSourced = StreamSourced { unStreamSourced :: TL.Text } deriving (IsString,Eq,Ord) instance P.Stream StreamSourced where type Token StreamSourced = Char type Tokens StreamSourced = TL.Text take1_ (StreamSourced t) = (StreamSourced <$>) <$> P.take1_ t takeN_ n (StreamSourced t) = (StreamSourced <$>) <$> P.takeN_ n t takeWhile_ f (StreamSourced t) = StreamSourced <$> P.takeWhile_ f t tokensToChunk _s = P.tokensToChunk (Proxy::Proxy TL.Text) chunkToTokens _s = P.chunkToTokens (Proxy::Proxy TL.Text) chunkLength _s = P.chunkLength (Proxy::Proxy TL.Text) {- advance1 _s indent (P.SourcePos n line col) c = case c of '\n' -> P.SourcePos n (line <> P.pos1) indent _ -> P.SourcePos n line (col <> P.pos1) advanceN s indent = TL.foldl' (P.advance1 s indent) -} -- | Wrapper around |P.runParser'| -- to use given 'Sourced' as starting position. runParserOnSourced :: Parsable e StreamSourced a => Parser e StreamSourced a -> Sourced FileSource TL.Text -> Either (P.ParseError (P.Token StreamSourced) e) a runParserOnSourced p (Sourced (FileRange inp bp _ep :| path) s) = snd $ P.runParser' (R.runReaderT p ro <* P.eof) P.State { P.stateInput = StreamSourced s , P.statePos = pure $ P.SourcePos inp (P.mkPos $ filePos_line bp) indent , P.stateTabWidth = indent , P.stateTokensProcessed = 0 } where indent = P.mkPos $ filePos_column bp ro = def{ reader_source = fromMaybe (pure def) $ nonEmpty path } -} -- * Type 'Error' data Error = Error_CharRef_invalid Integer -- ^ Well-formedness constraint: Legal Character. -- -- Characters referred to using character references MUST match the production for Char. | Error_EntityRef_unknown NCName -- ^ Well-formedness constraint: Entity Declared -- -- In a document without any DTD, a document with only an internal DTD -- subset which contains no parameter entity references, or a document -- with " standalone='yes' ", for an entity reference that does not occur -- within the external subset or a parameter entity, the Name given in the -- entity reference MUST match that in an entity declaration that does not -- occur within the external subset or a parameter entity, except that -- well-formed documents need not declare any of the following entities: -- amp, lt, gt, apos, quot. The declaration of a general entity MUST -- precede any reference to it which appears in a default value in an -- attribute-list declaration. -- -- Note that non-validating processors are not obligated to read and -- process entity declarations occurring in parameter entities or in the -- external subset; for such documents, the rule that an entity must be -- declared is a well-formedness constraint only if standalone='yes'. | Error_Closing_tag_unexpected QName QName -- ^ Well-formedness constraint: Element Type Match. -- -- The Name in an element's end-tag MUST match the element type in the start-tag. | Error_Attribute_collision QName -- ^ Well-formedness constraint: Unique Att Spec. -- -- An attribute name MUST NOT appear more than once in the same start-tag or empty-element tag. | Error_PI_reserved PName -- ^ The target names " XML ", " xml ", and so on are reserved for standardization. | Error_Namespace_prefix_unknown NCName -- ^ Namespace constraint: Prefix Declared -- -- The namespace prefix, unless it is xml or xmlns, MUST have been declared in a namespace declaration attribute in either the start-tag of the element where the prefix is used or in an ancestor element (i.e., an element in whose content the prefixed markup occurs). | Error_Namespace_empty NCName -- ^ Namespace constraint: No Prefix Undeclaring -- -- In a namespace declaration for a prefix (i.e., where the NSAttName is a PrefixedAttName), the attribute value MUST NOT be empty. | Error_Namespace_reserved Namespace | Error_Namespace_reserved_prefix NCName -- ^ Namespace constraint: Reserved Prefixes and Namespace Names -- -- The prefix xml is by definition bound to the namespace name -- http://www.w3.org/XML/1998/namespace. It MAY, but need not, be -- declared, and MUST NOT be bound to any other namespace name. Other -- prefixes MUST NOT be bound to this namespace name, and it MUST NOT be -- declared as the default namespace. -- -- The prefix xmlns is used only to declare namespace bindings and is by -- definition bound to the namespace name http://www.w3.org/2000/xmlns/. -- It MUST NOT be declared . Other prefixes MUST NOT be bound to this -- namespace name, and it MUST NOT be declared as the default namespace. -- Element names MUST NOT have the prefix xmlns. -- -- All other prefixes beginning with the three-letter sequence x, m, l, in -- any case combination, are reserved. This means that: -- -- - users SHOULD NOT use them except as defined by later specifications -- - processors MUST NOT treat them as fatal errors. deriving (Eq,Ord,Show) instance P.ShowErrorComponent Error where showErrorComponent = show -- * Helpers p_error :: e -> Parser e s a p_error = P.fancyFailure . Set.singleton . P.ErrorCustom p_quoted :: P.Tokens s ~ TL.Text => (Char -> Parser e s a) -> Parser e s a p_quoted p = P.between (P.char '"') (P.char '"') (p '"') <|> P.between (P.char '\'') (P.char '\'') (p '\'') p_until :: P.Tokens s ~ TL.Text => (Char -> Bool) -> (Char, TL.Text) -> Parser e s TL.Text p_until content (end, end_) = (TL.concat <$>) $ P.many $ P.takeWhile1P Nothing (\c -> content c && c /= end) <|> P.try (TL.singleton <$> P.char end <* P.notFollowedBy (P.string end_)) p_until1 :: P.Tokens s ~ TL.Text => (Char -> Bool) -> (Char, TL.Text) -> Parser e s TL.Text p_until1 content (end, end_) = (TL.concat <$>) $ P.some $ P.takeWhile1P Nothing (\c -> content c && c /= end) <|> P.try (TL.singleton <$> P.char end <* P.notFollowedBy (P.string end_))