]> Git — Sourcephile - haskell/symantic-xml.git/blob - Language/Symantic/XML/Parser.hs
Add more XML test files.
[haskell/symantic-xml.git] / Language / Symantic / XML / Parser.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.Symantic.XML.Parser where
8
9 import Control.Applicative (Applicative(..))
10 import Control.Monad (Monad(..))
11 import Data.Bool
12 import Data.Either (Either(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), (<$))
16 import Data.Functor.Identity (Identity(..))
17 import Data.List.NonEmpty (NonEmpty(..))
18 import Data.Maybe (Maybe(..))
19 import Data.Monoid (Monoid(..))
20 import Data.Ord (Ord(..))
21 import Data.Proxy (Proxy(..))
22 import Data.Semigroup (Semigroup(..))
23 import Data.Sequence (ViewL(..))
24 import Data.String (String)
25 import Data.Tuple (snd)
26 import Text.Show (Show(..))
27 import qualified Control.Monad.Trans.State as S
28 import qualified Data.List as List
29 import qualified Data.Sequence as Seq
30 import qualified Text.Megaparsec as P
31
32 import Language.Symantic.XML.Document (XML, XMLs)
33 import qualified Language.Symantic.XML.Document as XML
34
35 -- | Whether the given 'XML.Node' must be ignored by the parser.
36 isIgnoredNode :: XML.Node -> Bool
37 isIgnoredNode = \case
38 XML.NodeComment{} -> True
39 XML.NodePI{} -> True
40 XML.NodeCDATA{} -> True
41 _ -> False
42
43 instance P.Stream XMLs where
44 type Token XMLs = XML
45 type Tokens XMLs = XMLs
46 take1_ s =
47 case Seq.viewl s of
48 EmptyL -> Nothing
49 t@(XML.Tree XML.Sourced{XML.unSourced=n} _) :< ts
50 | isIgnoredNode n -> P.take1_ ts
51 | otherwise -> Just (t, ts)
52 positionAt1 _s pos (XML.Tree (XML.Sourced (XML.FileRange{XML.fileRange_begin=XML.FilePos l c}:|_) _n) _ts) =
53 pos{ P.sourceLine = P.mkPos l
54 , P.sourceColumn = P.mkPos c }
55 positionAtN s pos ts =
56 case Seq.viewl ts of
57 t :< _ -> P.positionAt1 s pos t
58 EmptyL -> pos
59 advance1 _s _indent pos (XML.Tree (XML.Sourced (XML.FileRange{XML.fileRange_end=XML.FilePos l c}:|_) _n) _ts) =
60 -- WARNING: the end of a 'FileRange' is not necessarily
61 -- the beginning of the next 'FileRange'.
62 pos{ P.sourceLine = P.mkPos l
63 , P.sourceColumn = P.mkPos c }
64 advanceN s = foldl' . P.advance1 s
65 takeN_ n s | n <= 0 = Just (mempty, s)
66 | null s = Nothing
67 | otherwise =
68 let (ns,rs) = Seq.splitAt n s in
69 let (ko,ok) = Seq.partition (isIgnoredNode . XML.unSourced . XML.unTree) ns in
70 case P.takeN_ (Seq.length ko) rs of
71 Nothing -> Just (ok, rs)
72 Just (ns',rs') -> Just (ok<>ns', rs')
73 tokensToChunk _s = Seq.fromList
74 chunkToTokens _s = toList
75 chunkLength _s = Seq.length
76 takeWhile_ = Seq.spanl
77 instance P.ShowToken XML where
78 showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks
79 where
80 showTree :: XML -> String
81 showTree (XML.Tree a _ts) =
82 showCell a $ \case
83 XML.NodeElem n -> "<"<>show n
84 XML.NodeAttr n -> show n<>"="
85 XML.NodeText _t -> "text"
86 XML.NodeComment _c -> "<!--"
87 XML.NodePI n _t -> "<?"<>show n
88 XML.NodeCDATA _t -> "<[CDATA[["
89
90 showCell (XML.Sourced path@(XML.FileRange{XML.fileRange_file} :| _) a) f =
91 if null fileRange_file
92 then f a
93 else f a <> foldMap (\p -> "\n in "<>show p) path
94
95 -- | Adjust the current 'P.SourcePos'
96 -- to be the begining of the following-sibling 'XML' node
97 -- so that on error 'P.toHints' keeps expected 'P.Token's in the list,
98 -- and thus makes useful error messages.
99 --
100 -- This is needed because the end of a 'FileRange'
101 -- is not necessarily the begin of the next 'FileRange'.
102
103 -- type Parser = S.StateT State (P.Parsec ErrorRead XMLs)
104
105 setFilePosToNextNode :: P.MonadParsec e XMLs m => m ()
106 setFilePosToNextNode = do
107 P.State
108 { P.stateInput = inp
109 , P.statePos = pos :| _
110 } <- P.getParserState
111 case Seq.viewl inp of
112 EmptyL -> return ()
113 t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XMLs) pos t
114
115 -- | @runParserT st pos p xs@ runs a 'Parser' @p@ to parse @xs@,
116 -- using state @st@ from position @pos@.
117 runParserT ::
118 Ord err =>
119 Monad m =>
120 P.ParsecT err XMLs m a ->
121 NonEmpty P.SourcePos ->
122 XMLs ->
123 m (Either (P.ParseError (P.Token XMLs) err) a)
124 runParserT p pos inp =
125 snd <$>
126 P.runParserT' p P.State
127 { P.stateInput = inp
128 , P.statePos =
129 case Seq.viewl inp of
130 EmptyL -> pos
131 XML.Tree (XML.Sourced ss _) _ :< _ ->
132 (<$> ss) $ \XML.FileRange{fileRange_begin=bp, fileRange_file} ->
133 P.SourcePos fileRange_file
134 (P.mkPos $ XML.filePos_line bp)
135 (P.mkPos $ XML.filePos_column bp)
136 , P.stateTabWidth = P.pos1
137 , P.stateTokensProcessed = 0
138 }
139
140 -- | Like 'runParser', but using 'Identity' as the inner-monad of 'P.ParsecT'.
141 runParser ::
142 Ord err =>
143 P.Parsec err XMLs a ->
144 NonEmpty P.SourcePos ->
145 XMLs ->
146 Either (P.ParseError (P.Token XMLs) err) a
147 runParser p pos xs = runIdentity $ runParserT p pos xs
148
149 -- | @parser p xs@ returns a parser parsing @xs@ entirely with @p@,
150 -- applying 'setFilePosToNextNode' in case of success,
151 -- or resetting 'P.statePos' and re-raising the exception in case of error.
152 parser :: Ord err => P.Parsec err XMLs a -> XMLs -> P.Parsec err XMLs a
153 parser p xs = do
154 P.State{P.statePos=pos} <- P.getParserState
155 case runParser (p <* P.eof) pos xs of
156 Left (P.TrivialError statePos un ex) -> do
157 s <- P.getParserState
158 P.setParserState s{P.statePos}
159 P.failure un ex
160 Left (P.FancyError statePos errs) -> do
161 s <- P.getParserState
162 P.setParserState s{P.statePos}
163 P.fancyFailure errs
164 Right a -> a <$ setFilePosToNextNode
165
166 -- | @stateParser p xs@ returns a stateful parser parsing @xs@ with @p@,
167 -- applying 'setFilePosToNextNode' in case of success.
168 stateParser ::
169 Ord err =>
170 S.StateT st (P.Parsec err XMLs) a ->
171 XMLs ->
172 S.StateT st (P.Parsec err XMLs) a
173 stateParser p xs = do
174 st <- S.get
175 P.State{P.statePos=pos} <- P.getParserState
176 case runParser (S.runStateT (p <* P.eof) st) pos xs of
177 Left (P.TrivialError statePos un ex) -> do
178 -- NOTE: just re-raising exception.
179 s <- P.getParserState
180 P.setParserState s{P.statePos}
181 P.failure un ex
182 Left (P.FancyError statePos errs) -> do
183 -- NOTE: just re-raising exception.
184 s <- P.getParserState
185 P.setParserState s{P.statePos}
186 P.fancyFailure errs
187 Right (a, st') -> do
188 S.put st'
189 a <$ setFilePosToNextNode