{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.Symantic.XML.Parser where import Control.Applicative (Applicative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Either (Either(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.)) import Data.Functor ((<$>), (<$)) import Data.Functor.Identity (Identity(..)) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Ord (Ord(..)) import Data.Proxy (Proxy(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (ViewL(..)) import Data.String (String) import Data.Tuple (snd) import Text.Show (Show(..)) import qualified Control.Monad.Trans.State as S import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Text.Megaparsec as P import Language.Symantic.XML.Document (XML, XMLs) import qualified Language.Symantic.XML.Document as XML -- | Whether the given 'XML.Node' must be ignored by the parser. isIgnoredNode :: XML.Node -> Bool isIgnoredNode = \case XML.NodeComment{} -> True XML.NodePI{} -> True XML.NodeCDATA{} -> True _ -> False instance P.Stream XMLs where type Token XMLs = XML type Tokens XMLs = XMLs take1_ s = case Seq.viewl s of EmptyL -> Nothing t@(XML.Tree XML.Sourced{XML.unSourced=n} _) :< ts | isIgnoredNode n -> P.take1_ ts | otherwise -> Just (t, ts) positionAt1 _s pos (XML.Tree (XML.Sourced (XML.FileRange{XML.fileRange_begin=XML.FilePos l c}:|_) _n) _ts) = pos{ P.sourceLine = P.mkPos l , P.sourceColumn = P.mkPos c } positionAtN s pos ts = case Seq.viewl ts of t :< _ -> P.positionAt1 s pos t EmptyL -> pos advance1 _s _indent pos (XML.Tree (XML.Sourced (XML.FileRange{XML.fileRange_end=XML.FilePos l c}:|_) _n) _ts) = -- WARNING: the end of a 'FileRange' is not necessarily -- the beginning of the next 'FileRange'. pos{ P.sourceLine = P.mkPos l , P.sourceColumn = P.mkPos c } advanceN s = foldl' . P.advance1 s takeN_ n s | n <= 0 = Just (mempty, s) | null s = Nothing | otherwise = let (ns,rs) = Seq.splitAt n s in let (ko,ok) = Seq.partition (isIgnoredNode . XML.unSourced . XML.unTree) ns in case P.takeN_ (Seq.length ko) rs of Nothing -> Just (ok, rs) Just (ns',rs') -> Just (ok<>ns', rs') tokensToChunk _s = Seq.fromList chunkToTokens _s = toList chunkLength _s = Seq.length takeWhile_ = Seq.spanl -- | Adjust the current 'P.SourcePos' -- to be the begining of the following-sibling 'XML' node -- so that on error 'P.toHints' keeps expected 'P.Token's in the list, -- and thus makes useful error messages. -- -- This is needed because the end of a 'FileRange' -- is not necessarily the begin of the next 'FileRange'. -- type Parser = S.StateT State (P.Parsec ErrorRead XMLs) setFilePosToNextNode :: P.MonadParsec e XMLs m => m () setFilePosToNextNode = do P.State { P.stateInput = inp , P.statePos = pos :| _ } <- P.getParserState case Seq.viewl inp of EmptyL -> return () t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XMLs) pos t -- | @runParserT st pos p xs@ runs a 'Parser' @p@ to parse @xs@, -- using state @st@ from position @pos@. runParserT :: Ord err => Monad m => P.ParsecT err XMLs m a -> NonEmpty P.SourcePos -> XMLs -> m (Either (P.ParseError (P.Token XMLs) err) a) runParserT p pos inp = snd <$> P.runParserT' p P.State { P.stateInput = inp , P.statePos = case Seq.viewl inp of EmptyL -> pos XML.Tree (XML.Sourced ss _) _ :< _ -> (<$> ss) $ \XML.FileRange{fileRange_begin=bp, fileRange_file} -> P.SourcePos fileRange_file (P.mkPos $ XML.filePos_line bp) (P.mkPos $ XML.filePos_column bp) , P.stateTabWidth = P.pos1 , P.stateTokensProcessed = 0 } -- | Like 'runParser', but using 'Identity' as the inner-monad of 'P.ParsecT'. runParser :: Ord err => P.Parsec err XMLs a -> NonEmpty P.SourcePos -> XMLs -> Either (P.ParseError (P.Token XMLs) err) a runParser p pos xs = runIdentity $ runParserT p pos xs -- | @parser p xs@ returns a parser parsing @xs@ entirely with @p@, -- applying 'setFilePosToNextNode' in case of success, -- or resetting 'P.statePos' and re-raising the exception in case of error. parser :: Ord err => P.Parsec err XMLs a -> XMLs -> P.Parsec err XMLs a parser p xs = do P.State{P.statePos=pos} <- P.getParserState case runParser (p <* P.eof) pos xs of Left (P.TrivialError statePos un ex) -> do s <- P.getParserState P.setParserState s{P.statePos} P.failure un ex Left (P.FancyError statePos errs) -> do s <- P.getParserState P.setParserState s{P.statePos} P.fancyFailure errs Right a -> a <$ setFilePosToNextNode -- | @stateParser p xs@ returns a stateful parser parsing @xs@ with @p@, -- applying 'setFilePosToNextNode' in case of success. stateParser :: Ord err => S.StateT st (P.Parsec err XMLs) a -> XMLs -> S.StateT st (P.Parsec err XMLs) a stateParser p xs = do st <- S.get P.State{P.statePos=pos} <- P.getParserState case runParser (S.runStateT (p <* P.eof) st) pos xs of Left (P.TrivialError statePos un ex) -> do -- NOTE: just re-raising exception. s <- P.getParserState P.setParserState s{P.statePos} P.failure un ex Left (P.FancyError statePos errs) -> do -- NOTE: just re-raising exception. s <- P.getParserState P.setParserState s{P.statePos} P.fancyFailure errs Right (a, st') -> do S.put st' a <$ setFilePosToNextNode