{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE Rank2Types #-} {-# LANGUAGE TypeFamilies #-} module Language.Symantic.XML.Read.Parser where import Control.Applicative (Applicative(..), Alternative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Default.Class (Default(..)) import Data.Either (Either(..)) import Data.Eq (Eq(..)) import Data.Function (($), (.)) import Data.Functor (Functor, (<$>)) import Data.Functor.Identity (Identity(..)) import Data.List.NonEmpty (NonEmpty(..), nonEmpty) import Data.Maybe (Maybe(..), fromMaybe) import Data.Ord (Ord(..)) import Data.Proxy (Proxy(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq) import Data.String (IsString) import Data.Tuple (snd) import Prelude (Int, Integer, Num(..), fromIntegral) import System.FilePath (FilePath) import Text.Show (Show(..), showChar, showString, showParen) import qualified Control.Monad.Trans.Reader as R import qualified Data.HashMap.Strict as HM import qualified Data.Set as Set import qualified Data.Text as Text import qualified Data.Text.Lazy as TL import qualified Data.TreeSeq.Strict as TS import qualified Text.Megaparsec as P import qualified Text.Megaparsec.Char as P import Language.Symantic.XML.Document -- * Type 'XML' type XML = TS.Tree (Sourced FileSource Node) type XMLs = Seq XML -- * Type 'Parser' -- | Convenient alias. type Parser e s a = Parsable e s a => R.ReaderT Reader (P.ParsecT e s Identity) a -- ** Type 'Parsable' type Parsable e s a = ( P.Stream s , P.Token s ~ Char , Ord e , IsString (P.Tokens s) , P.ShowErrorComponent e ) -- ** Type 'Reader' data Reader = Reader { reader_source :: !FileSource , reader_ns_scope :: !(HM.HashMap NCName Namespace) , reader_ns_default :: !Namespace } deriving (Show) instance Default Reader where def = Reader { reader_source = pure def , reader_ns_scope = HM.fromList [ ("xml" , xmlns_xml) , ("xmlns", xmlns_xmlns) ] , reader_ns_default = "" } -- * Type 'Sourced' data Sourced src a = Sourced { source :: !src , unSourced :: !a } deriving (Eq, Ord, Functor) instance (Show src, Show a) => Show (Sourced src a) where showsPrec p Sourced{..} = showParen (p > 10) $ showsPrec 11 unSourced . showString " @" . showsPrec 10 source instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y = Sourced (FileRange fx bx ey :| lx) $ x<>fromPad (FilePos lines columns)<>y where lines = filePos_line by - filePos_line ex columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx) p_Sourced :: Parser e s a -> Parser e s (Sourced FileSource a) p_Sourced pa = do Reader{reader_source} <- R.ask beginPos :| _ <- P.statePos <$> P.getParserState a <- pa fileRange_end <- p_FilePos let fileRange = FileRange { fileRange_file = P.sourceName beginPos , fileRange_begin = FilePos (P.unPos $ P.sourceLine beginPos) (P.unPos $ P.sourceColumn beginPos) , fileRange_end } return $ Sourced (setSource fileRange reader_source) a setSource :: FileRange -> FileSource -> FileSource setSource fileRange (_curr:|next) = fileRange :| next -- | Like 'p_Sourced' but uncoupled for more flexibility. p_SourcedBegin :: Parser e s a -> Parser e s a p_SourcedBegin pa = do currPos :| _ <- P.statePos <$> P.getParserState let fileRange_begin = FilePos (P.unPos $ P.sourceLine currPos) (P.unPos $ P.sourceColumn currPos) let fileRange = FileRange { fileRange_file = P.sourceName currPos , fileRange_begin , fileRange_end = fileRange_begin } (`R.local` pa) $ \ro@Reader{..} -> ro{ reader_source = setSource fileRange reader_source } -- | Only to be used within a 'p_SourcedBegin'. p_SourcedEnd :: Parser e s (a -> Sourced FileSource a) p_SourcedEnd = do fileRange_end <- p_FilePos Reader{..} <- R.ask return $ Sourced $ (\(curr:|path) -> curr{fileRange_end}:|path) reader_source -- | Wrapper around |P.runParser'| -- to use given 'Sourced' as starting position. runParserOnSourced :: Parsable e StreamSourced a => Parser e StreamSourced a -> Sourced FileSource TL.Text -> Either (P.ParseError (P.Token StreamSourced) e) a runParserOnSourced p (Sourced (FileRange inp bp _ep :| path) s) = snd $ P.runParser' (R.runReaderT p ro <* P.eof) P.State { P.stateInput = StreamSourced s , P.statePos = pure $ P.SourcePos inp (P.mkPos $ filePos_line bp) indent , P.stateTabWidth = indent , P.stateTokensProcessed = 0 } where indent = P.mkPos $ filePos_column bp ro = def{ reader_source = fromMaybe (pure def) $ nonEmpty path } -- ** Class 'NoSource' class NoSource src where noSource :: src instance NoSource FileSource where noSource = noSource :| [] instance NoSource FileRange where noSource = FileRange "" filePos1 filePos1 {- instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where mempty = sourced0 mempty mappend = (<>) -} notSourced :: NoSource src => a -> Sourced src a notSourced = Sourced noSource -- ** Type 'StreamSourced' -- | Wrap 'TL.Text' to have a 'P.Stream' instance -- whose 'P.advance1' method abuses the tab width state -- to instead pass the line indent. -- This in order to report correct 'P.SourcePos' -- when parsing a 'Sourced' containing newlines. newtype StreamSourced = StreamSourced { unStreamSourced :: TL.Text } deriving (IsString,Eq,Ord) instance P.Stream StreamSourced where type Token StreamSourced = Char type Tokens StreamSourced = TL.Text take1_ (StreamSourced t) = (StreamSourced <$>) <$> P.take1_ t takeN_ n (StreamSourced t) = (StreamSourced <$>) <$> P.takeN_ n t takeWhile_ f (StreamSourced t) = StreamSourced <$> P.takeWhile_ f t tokensToChunk _s = P.tokensToChunk (Proxy::Proxy TL.Text) chunkToTokens _s = P.chunkToTokens (Proxy::Proxy TL.Text) chunkLength _s = P.chunkLength (Proxy::Proxy TL.Text) advance1 _s indent (P.SourcePos n line col) c = case c of '\n' -> P.SourcePos n (line <> P.pos1) indent _ -> P.SourcePos n line (col <> P.pos1) advanceN s indent = TL.foldl' (P.advance1 s indent) -- * Type 'FileSource' type FileSource = NonEmpty FileRange -- ** Type 'FileRange' data FileRange = FileRange { fileRange_file :: !FilePath , fileRange_begin :: !FilePos , fileRange_end :: !FilePos } deriving (Eq, Ord) instance Default FileRange where def = FileRange "" filePos1 filePos1 instance Show FileRange where showsPrec _p FileRange{..} = showString fileRange_file . showChar '#' . showsPrec 10 fileRange_begin . showChar '-' . showsPrec 10 fileRange_end -- *** Type 'FilePos' -- | Absolute text file position. data FilePos = FilePos { filePos_line :: {-# UNPACK #-} !LineNum , filePos_column :: {-# UNPACK #-} !ColNum } deriving (Eq, Ord) instance Default FilePos where def = filePos1 instance Show FilePos where showsPrec _p FilePos{..} = showsPrec 11 filePos_line . showChar ':' . showsPrec 11 filePos_column filePos1 :: FilePos filePos1 = FilePos 1 1 p_FilePos :: Parser e s FilePos p_FilePos = do pos :| _ <- P.statePos <$> P.getParserState return $ FilePos (P.unPos $ P.sourceLine pos) (P.unPos $ P.sourceColumn pos) -- **** Type 'LineNum' type LineNum = Int -- **** Type 'ColNum' type ColNum = Int -- **** Class 'FromPad' class FromPad a where fromPad :: FilePos -> a instance FromPad Text.Text where fromPad FilePos{..} = Text.replicate filePos_line "\n" <> Text.replicate filePos_column " " instance FromPad TL.Text where fromPad FilePos{..} = TL.replicate (fromIntegral filePos_line) "\n" <> TL.replicate (fromIntegral filePos_column) " " -- * Type 'Error' data Error = Error_CharRef_invalid Integer -- ^ Well-formedness constraint: Legal Character. -- -- Characters referred to using character references MUST match the production for Char. | Error_EntityRef_unknown Name -- ^ Well-formedness constraint: Entity Declared -- -- In a document without any DTD, a document with only an internal DTD -- subset which contains no parameter entity references, or a document -- with " standalone='yes' ", for an entity reference that does not occur -- within the external subset or a parameter entity, the Name given in the -- entity reference MUST match that in an entity declaration that does not -- occur within the external subset or a parameter entity, except that -- well-formed documents need not declare any of the following entities: -- amp, lt, gt, apos, quot. The declaration of a general entity MUST -- precede any reference to it which appears in a default value in an -- attribute-list declaration. -- -- Note that non-validating processors are not obligated to read and -- process entity declarations occurring in parameter entities or in the -- external subset; for such documents, the rule that an entity must be -- declared is a well-formedness constraint only if standalone='yes'. | Error_Closing_tag_unexpected QName QName -- ^ Well-formedness constraint: Element Type Match. -- -- The Name in an element's end-tag MUST match the element type in the start-tag. | Error_Attribute_collision QName -- ^ Well-formedness constraint: Unique Att Spec. -- -- An attribute name MUST NOT appear more than once in the same start-tag or empty-element tag. | Error_PI_reserved PName -- ^ The target names " XML ", " xml ", and so on are reserved for standardization. | Error_Namespace_prefix_unknown NCName -- ^ Namespace constraint: Prefix Declared -- -- The namespace prefix, unless it is xml or xmlns, MUST have been declared in a namespace declaration attribute in either the start-tag of the element where the prefix is used or in an ancestor element (i.e., an element in whose content the prefixed markup occurs). | Error_Namespace_empty NCName -- ^ Namespace constraint: No Prefix Undeclaring -- -- In a namespace declaration for a prefix (i.e., where the NSAttName is a PrefixedAttName), the attribute value MUST NOT be empty. | Error_Namespace_reserved Namespace | Error_Namespace_reserved_prefix NCName -- ^ Namespace constraint: Reserved Prefixes and Namespace Names -- -- The prefix xml is by definition bound to the namespace name -- http://www.w3.org/XML/1998/namespace. It MAY, but need not, be -- declared, and MUST NOT be bound to any other namespace name. Other -- prefixes MUST NOT be bound to this namespace name, and it MUST NOT be -- declared as the default namespace. -- -- The prefix xmlns is used only to declare namespace bindings and is by -- definition bound to the namespace name http://www.w3.org/2000/xmlns/. -- It MUST NOT be declared . Other prefixes MUST NOT be bound to this -- namespace name, and it MUST NOT be declared as the default namespace. -- Element names MUST NOT have the prefix xmlns. -- -- All other prefixes beginning with the three-letter sequence x, m, l, in -- any case combination, are reserved. This means that: -- -- - users SHOULD NOT use them except as defined by later specifications -- - processors MUST NOT treat them as fatal errors. deriving (Eq,Ord,Show) instance P.ShowErrorComponent Error where showErrorComponent = show -- * Helpers p_error :: e -> Parser e s a p_error = P.fancyFailure . Set.singleton . P.ErrorCustom p_quoted :: P.Tokens s ~ TL.Text => (Char -> Parser e s a) -> Parser e s a p_quoted p = P.between (P.char '"') (P.char '"') (p '"') <|> P.between (P.char '\'') (P.char '\'') (p '\'') p_until :: P.Tokens s ~ TL.Text => (Char -> Bool) -> (Char, TL.Text) -> Parser e s TL.Text p_until content (end, end_) = (TL.concat <$>) $ P.many $ P.takeWhile1P Nothing (\c -> content c && c /= end) <|> P.try (TL.singleton <$> P.char end <* P.notFollowedBy (P.string end_)) p_until1 :: P.Tokens s ~ TL.Text => (Char -> Bool) -> (Char, TL.Text) -> Parser e s TL.Text p_until1 content (end, end_) = (TL.concat <$>) $ P.some $ P.takeWhile1P Nothing (\c -> content c && c /= end) <|> P.try (TL.singleton <$> P.char end <* P.notFollowedBy (P.string end_))