{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.Symantic.XML.Parser where import Control.Applicative (Applicative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Either (Either(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.)) import Data.Functor ((<$>)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Ord (Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (ViewL(..)) import Prelude (error) import Text.Show (Show(..)) import qualified Data.List as List import qualified Data.List.NonEmpty as NonEmpty import qualified Data.Sequence as Seq import qualified Text.Megaparsec as P import Language.Symantic.XML.Document (XML, XMLs) import qualified Language.Symantic.XML.Document as XML -- | Whether the given 'XML.Node' must be ignored by the parser. isIgnoredNode :: XML.Node -> Bool isIgnoredNode = \case XML.NodeComment{} -> True XML.NodePI{} -> True XML.NodeCDATA{} -> True _ -> False instance Ord src => P.Stream (XMLs src) where type Token (XMLs src) = XML src type Tokens (XMLs src) = XMLs src take1_ s = case Seq.viewl s of EmptyL -> Nothing t@(XML.Tree XML.Sourced{XML.unSourced=n} _) :< ts | isIgnoredNode n -> P.take1_ ts | otherwise -> Just (t, ts) takeN_ n s | n <= 0 = Just (mempty, s) | null s = Nothing | otherwise = let (ns,rs) = Seq.splitAt n s in let (ko,ok) = Seq.partition (isIgnoredNode . XML.unSourced . XML.unTree) ns in case P.takeN_ (Seq.length ko) rs of Nothing -> Just (ok, rs) Just (ns',rs') -> Just (ok<>ns', rs') tokensToChunk _s = Seq.fromList chunkToTokens _s = toList chunkLength _s = Seq.length takeWhile_ = Seq.spanl -- | NOTE: unimplemented: useless since each 'XML.Node' is annotated with its 'FileSource'. reachOffset = error "[BUG] P.Stream XMLs: reachOffset is not helpful, please use annotated source locations" -- | NOTE: useless since each 'XML.Node' is annotated with its 'FileSource'. reachOffsetNoLine = error "[BUG] P.Stream XMLs: reachOffsetNoLine is not helpful, please use annotated source locations" showTokens _s toks = List.intercalate ", " $ toList $ showTree <$> toks where showTree (XML.Tree (XML.Sourced _src a) _ts) = case a of XML.NodeElem n -> "element "<>show n<>"" XML.NodeAttr n -> "attribute "<>show n<>"" XML.NodeText _t -> "text" XML.NodeComment _c -> "comment" XML.NodePI n _t -> "processing-instruction "<>show n<>"" XML.NodeCDATA _t -> "cdata" -- | @subParser p xs@ returns a parser parsing @xs@ entirely with @p@, -- updating 'P.stateOffset' and re-raising any exception. subParser :: Ord err => Ord src => P.Parsec err (XMLs src) a -> XMLs src -> P.Parsec err (XMLs src) a subParser p xs = do st <- P.getParserState let (st', res) = P.runParser' (p <* P.eof) st { P.stateInput = xs , P.stateOffset = P.stateOffset st } P.updateParserState (\s -> s{P.stateOffset = P.stateOffset st'}) case res of Right a -> return a Left (P.ParseErrorBundle errs _) -> case NonEmpty.head errs of P.TrivialError _o us es -> P.failure us es P.FancyError _o es -> P.fancyFailure es