]> Git — Sourcephile - haskell/symantic-xml.git/blob - Language/Symantic/XML/Parser.hs
XML: fix union of EscapedText
[haskell/symantic-xml.git] / Language / Symantic / XML / Parser.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE ScopedTypeVariables #-}
4 {-# LANGUAGE TypeFamilies #-}
5 {-# OPTIONS_GHC -fno-warn-orphans #-}
6 module Language.Symantic.XML.Parser where
7
8 import Control.Applicative (Applicative(..))
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Either (Either(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.))
14 import Data.Functor ((<$>), (<$))
15 import Data.Functor.Identity (Identity(..))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ord(..))
20 import Data.Proxy (Proxy(..))
21 import Data.Semigroup (Semigroup(..))
22 import Data.Sequence (ViewL(..))
23 import Data.String (String)
24 import Data.Tuple (snd)
25 import Text.Show (Show(..))
26 import qualified Control.Monad.Trans.State as S
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Text.Megaparsec as P
30
31 import Language.Symantic.XML.Document (XML, XMLs)
32 import qualified Language.Symantic.XML.Document as XML
33
34 -- | Whether the given 'XML.Node' must be ignored by the parser.
35 isIgnoredNode :: XML.Node -> Bool
36 isIgnoredNode = \case
37 XML.NodeComment{} -> True
38 XML.NodePI{} -> True
39 XML.NodeCDATA{} -> True
40 _ -> False
41
42 instance P.Stream XMLs where
43 type Token XMLs = XML
44 type Tokens XMLs = XMLs
45 take1_ s =
46 case Seq.viewl s of
47 EmptyL -> Nothing
48 t@(XML.Tree XML.Sourced{XML.unSourced=n} _) :< ts
49 | isIgnoredNode n -> P.take1_ ts
50 | otherwise -> Just (t, ts)
51 positionAt1 _s pos (XML.Tree (XML.Sourced (XML.FileRange{XML.fileRange_begin=XML.FilePos l c}:|_) _n) _ts) =
52 pos{ P.sourceLine = P.mkPos l
53 , P.sourceColumn = P.mkPos c }
54 positionAtN s pos ts =
55 case Seq.viewl ts of
56 t :< _ -> P.positionAt1 s pos t
57 EmptyL -> pos
58 advance1 _s _indent pos (XML.Tree (XML.Sourced (XML.FileRange{XML.fileRange_end=XML.FilePos l c}:|_) _n) _ts) =
59 -- WARNING: the end of a 'FileRange' is not necessarily
60 -- the beginning of the next 'FileRange'.
61 pos{ P.sourceLine = P.mkPos l
62 , P.sourceColumn = P.mkPos c }
63 advanceN s = foldl' . P.advance1 s
64 takeN_ n s | n <= 0 = Just (mempty, s)
65 | null s = Nothing
66 | otherwise =
67 let (ns,rs) = Seq.splitAt n s in
68 let (ko,ok) = Seq.partition (isIgnoredNode . XML.unSourced . XML.unTree) ns in
69 case P.takeN_ (Seq.length ko) rs of
70 Nothing -> Just (ok, rs)
71 Just (ns',rs') -> Just (ok<>ns', rs')
72 tokensToChunk _s = Seq.fromList
73 chunkToTokens _s = toList
74 chunkLength _s = Seq.length
75 takeWhile_ = Seq.spanl
76
77 -- | Adjust the current 'P.SourcePos'
78 -- to be the begining of the following-sibling 'XML' node
79 -- so that on error 'P.toHints' keeps expected 'P.Token's in the list,
80 -- and thus makes useful error messages.
81 --
82 -- This is needed because the end of a 'FileRange'
83 -- is not necessarily the begin of the next 'FileRange'.
84
85 -- type Parser = S.StateT State (P.Parsec ErrorRead XMLs)
86
87 setFilePosToNextNode :: P.MonadParsec e XMLs m => m ()
88 setFilePosToNextNode = do
89 P.State
90 { P.stateInput = inp
91 , P.statePos = pos :| _
92 } <- P.getParserState
93 case Seq.viewl inp of
94 EmptyL -> return ()
95 t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XMLs) pos t
96
97 -- | @runParserT st pos p xs@ runs a 'Parser' @p@ to parse @xs@,
98 -- using state @st@ from position @pos@.
99 runParserT ::
100 Ord err =>
101 Monad m =>
102 P.ParsecT err XMLs m a ->
103 NonEmpty P.SourcePos ->
104 XMLs ->
105 m (Either (P.ParseError (P.Token XMLs) err) a)
106 runParserT p pos inp =
107 snd <$>
108 P.runParserT' p P.State
109 { P.stateInput = inp
110 , P.statePos =
111 case Seq.viewl inp of
112 EmptyL -> pos
113 XML.Tree (XML.Sourced ss _) _ :< _ ->
114 (<$> ss) $ \XML.FileRange{fileRange_begin=bp, fileRange_file} ->
115 P.SourcePos fileRange_file
116 (P.mkPos $ XML.filePos_line bp)
117 (P.mkPos $ XML.filePos_column bp)
118 , P.stateTabWidth = P.pos1
119 , P.stateTokensProcessed = 0
120 }
121
122 -- | Like 'runParser', but using 'Identity' as the inner-monad of 'P.ParsecT'.
123 runParser ::
124 Ord err =>
125 P.Parsec err XMLs a ->
126 NonEmpty P.SourcePos ->
127 XMLs ->
128 Either (P.ParseError (P.Token XMLs) err) a
129 runParser p pos xs = runIdentity $ runParserT p pos xs
130
131 -- | @parser p xs@ returns a parser parsing @xs@ entirely with @p@,
132 -- applying 'setFilePosToNextNode' in case of success,
133 -- or resetting 'P.statePos' and re-raising the exception in case of error.
134 parser :: Ord err => P.Parsec err XMLs a -> XMLs -> P.Parsec err XMLs a
135 parser p xs = do
136 P.State{P.statePos=pos} <- P.getParserState
137 case runParser (p <* P.eof) pos xs of
138 Left (P.TrivialError statePos un ex) -> do
139 s <- P.getParserState
140 P.setParserState s{P.statePos}
141 P.failure un ex
142 Left (P.FancyError statePos errs) -> do
143 s <- P.getParserState
144 P.setParserState s{P.statePos}
145 P.fancyFailure errs
146 Right a -> a <$ setFilePosToNextNode
147
148 -- | @stateParser p xs@ returns a stateful parser parsing @xs@ with @p@,
149 -- applying 'setFilePosToNextNode' in case of success.
150 stateParser ::
151 Ord err =>
152 S.StateT st (P.Parsec err XMLs) a ->
153 XMLs ->
154 S.StateT st (P.Parsec err XMLs) a
155 stateParser p xs = do
156 st <- S.get
157 P.State{P.statePos=pos} <- P.getParserState
158 case runParser (S.runStateT (p <* P.eof) st) pos xs of
159 Left (P.TrivialError statePos un ex) -> do
160 -- NOTE: just re-raising exception.
161 s <- P.getParserState
162 P.setParserState s{P.statePos}
163 P.failure un ex
164 Left (P.FancyError statePos errs) -> do
165 -- NOTE: just re-raising exception.
166 s <- P.getParserState
167 P.setParserState s{P.statePos}
168 P.fancyFailure errs
169 Right (a, st') -> do
170 S.put st'
171 a <$ setFilePosToNextNode