1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE OverloadedStrings #-}
6 {-# LANGUAGE Rank2Types #-}
7 {-# LANGUAGE TypeFamilies #-}
8 module Language.Symantic.XML.Read.Parser where
10 import Control.Applicative (Applicative(..), Alternative(..))
11 import Control.Monad (Monad(..))
13 import Data.Char (Char)
14 import Data.Default.Class (Default(..))
15 import Data.Either (Either(..))
16 import Data.Eq (Eq(..))
17 import Data.Function (($), (.))
18 import Data.Functor (Functor, (<$>))
19 import Data.Functor.Identity (Identity(..))
20 import Data.List.NonEmpty (NonEmpty(..), nonEmpty)
21 import Data.Maybe (Maybe(..), fromMaybe)
22 import Data.Ord (Ord(..))
23 import Data.Proxy (Proxy(..))
24 import Data.Semigroup (Semigroup(..))
25 import Data.Sequence (Seq)
26 import Data.String (IsString)
27 import Data.Tuple (snd)
28 import Prelude (Int, Integer, Num(..), fromIntegral)
29 import System.FilePath (FilePath)
30 import Text.Show (Show(..), showChar, showString, showParen)
31 import qualified Control.Monad.Trans.Reader as R
32 import qualified Data.HashMap.Strict as HM
33 import qualified Data.Set as Set
34 import qualified Data.Text as Text
35 import qualified Data.Text.Lazy as TL
36 import qualified Data.TreeSeq.Strict as TS
37 import qualified Text.Megaparsec as P
38 import qualified Text.Megaparsec.Char as P
40 import Language.Symantic.XML.Document
43 type XML = TS.Tree (Sourced FileSource Node)
47 -- | Convenient alias.
50 R.ReaderT Reader (P.ParsecT e s Identity) a
57 , IsString (P.Tokens s)
58 , P.ShowErrorComponent e
63 { reader_source :: !FileSource
64 , reader_ns_scope :: !(HM.HashMap NCName Namespace)
65 , reader_ns_default :: !Namespace
67 instance Default Reader where
69 { reader_source = pure def
70 , reader_ns_scope = HM.fromList
72 , ("xmlns", xmlns_xmlns)
74 , reader_ns_default = ""
82 } deriving (Eq, Ord, Functor)
83 instance (Show src, Show a) => Show (Sourced src a) where
84 showsPrec p Sourced{..} =
86 showsPrec 11 unSourced .
87 showString " @" . showsPrec 10 source
88 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
89 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
90 Sourced (FileRange fx bx ey :| lx) $
91 x<>fromPad (FilePos lines columns)<>y
93 lines = filePos_line by - filePos_line ex
94 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
96 p_Sourced :: Parser e s a -> Parser e s (Sourced FileSource a)
98 Reader{reader_source} <- R.ask
99 beginPos :| _ <- P.statePos <$> P.getParserState
101 fileRange_end <- p_FilePos
102 let fileRange = FileRange
103 { fileRange_file = P.sourceName beginPos
106 (P.unPos $ P.sourceLine beginPos)
107 (P.unPos $ P.sourceColumn beginPos)
110 return $ Sourced (setSource fileRange reader_source) a
112 setSource :: FileRange -> FileSource -> FileSource
113 setSource fileRange (_curr:|next) = fileRange :| next
115 -- | Like 'p_Sourced' but uncoupled for more flexibility.
116 p_SourcedBegin :: Parser e s a -> Parser e s a
117 p_SourcedBegin pa = do
118 currPos :| _ <- P.statePos <$> P.getParserState
119 let fileRange_begin = FilePos
120 (P.unPos $ P.sourceLine currPos)
121 (P.unPos $ P.sourceColumn currPos)
122 let fileRange = FileRange
123 { fileRange_file = P.sourceName currPos
125 , fileRange_end = fileRange_begin
127 (`R.local` pa) $ \ro@Reader{..} ->
128 ro{ reader_source = setSource fileRange reader_source }
130 -- | Only to be used within a 'p_SourcedBegin'.
131 p_SourcedEnd :: Parser e s (a -> Sourced FileSource a)
133 fileRange_end <- p_FilePos
136 (\(curr:|path) -> curr{fileRange_end}:|path)
139 -- | Wrapper around |P.runParser'|
140 -- to use given 'Sourced' as starting position.
141 runParserOnSourced ::
142 Parsable e StreamSourced a =>
143 Parser e StreamSourced a ->
144 Sourced FileSource TL.Text ->
145 Either (P.ParseError (P.Token StreamSourced) e) a
146 runParserOnSourced p (Sourced (FileRange inp bp _ep :| path) s) =
148 P.runParser' (R.runReaderT p ro <* P.eof)
150 { P.stateInput = StreamSourced s
151 , P.statePos = pure $ P.SourcePos inp (P.mkPos $ filePos_line bp) indent
152 , P.stateTabWidth = indent
153 , P.stateTokensProcessed = 0
156 indent = P.mkPos $ filePos_column bp
157 ro = def{ reader_source = fromMaybe (pure def) $ nonEmpty path }
159 -- ** Class 'NoSource'
160 class NoSource src where
162 instance NoSource FileSource where
163 noSource = noSource :| []
164 instance NoSource FileRange where
165 noSource = FileRange "" filePos1 filePos1
167 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
168 mempty = sourced0 mempty
172 notSourced :: NoSource src => a -> Sourced src a
173 notSourced = Sourced noSource
175 -- ** Type 'StreamSourced'
176 -- | Wrap 'TL.Text' to have a 'P.Stream' instance
177 -- whose 'P.advance1' method abuses the tab width state
178 -- to instead pass the line indent.
179 -- This in order to report correct 'P.SourcePos'
180 -- when parsing a 'Sourced' containing newlines.
181 newtype StreamSourced = StreamSourced { unStreamSourced :: TL.Text }
182 deriving (IsString,Eq,Ord)
183 instance P.Stream StreamSourced where
184 type Token StreamSourced = Char
185 type Tokens StreamSourced = TL.Text
186 take1_ (StreamSourced t) = (StreamSourced <$>) <$> P.take1_ t
187 takeN_ n (StreamSourced t) = (StreamSourced <$>) <$> P.takeN_ n t
188 takeWhile_ f (StreamSourced t) = StreamSourced <$> P.takeWhile_ f t
189 tokensToChunk _s = P.tokensToChunk (Proxy::Proxy TL.Text)
190 chunkToTokens _s = P.chunkToTokens (Proxy::Proxy TL.Text)
191 chunkLength _s = P.chunkLength (Proxy::Proxy TL.Text)
192 advance1 _s indent (P.SourcePos n line col) c =
194 '\n' -> P.SourcePos n (line <> P.pos1) indent
195 _ -> P.SourcePos n line (col <> P.pos1)
196 advanceN s indent = TL.foldl' (P.advance1 s indent)
198 -- * Type 'FileSource'
199 type FileSource = NonEmpty FileRange
201 -- ** Type 'FileRange'
204 { fileRange_file :: !FilePath
205 , fileRange_begin :: !FilePos
206 , fileRange_end :: !FilePos
208 instance Default FileRange where
209 def = FileRange "" filePos1 filePos1
210 instance Show FileRange where
211 showsPrec _p FileRange{..} =
212 showString fileRange_file .
213 showChar '#' . showsPrec 10 fileRange_begin .
214 showChar '-' . showsPrec 10 fileRange_end
216 -- *** Type 'FilePos'
217 -- | Absolute text file position.
218 data FilePos = FilePos
219 { filePos_line :: {-# UNPACK #-} !LineNum
220 , filePos_column :: {-# UNPACK #-} !ColNum
222 instance Default FilePos where
224 instance Show FilePos where
225 showsPrec _p FilePos{..} =
226 showsPrec 11 filePos_line .
228 showsPrec 11 filePos_column
231 filePos1 = FilePos 1 1
233 p_FilePos :: Parser e s FilePos
235 pos :| _ <- P.statePos <$> P.getParserState
236 return $ FilePos (P.unPos $ P.sourceLine pos) (P.unPos $ P.sourceColumn pos)
238 -- **** Type 'LineNum'
241 -- **** Type 'ColNum'
244 -- **** Class 'FromPad'
245 class FromPad a where
246 fromPad :: FilePos -> a
247 instance FromPad Text.Text where
248 fromPad FilePos{..} =
249 Text.replicate filePos_line "\n" <>
250 Text.replicate filePos_column " "
251 instance FromPad TL.Text where
252 fromPad FilePos{..} =
253 TL.replicate (fromIntegral filePos_line) "\n" <>
254 TL.replicate (fromIntegral filePos_column) " "
258 = Error_CharRef_invalid Integer
259 -- ^ Well-formedness constraint: Legal Character.
261 -- Characters referred to using character references MUST match the production for Char.
262 | Error_EntityRef_unknown Name
263 -- ^ Well-formedness constraint: Entity Declared
265 -- In a document without any DTD, a document with only an internal DTD
266 -- subset which contains no parameter entity references, or a document
267 -- with " standalone='yes' ", for an entity reference that does not occur
268 -- within the external subset or a parameter entity, the Name given in the
269 -- entity reference MUST match that in an entity declaration that does not
270 -- occur within the external subset or a parameter entity, except that
271 -- well-formed documents need not declare any of the following entities:
272 -- amp, lt, gt, apos, quot. The declaration of a general entity MUST
273 -- precede any reference to it which appears in a default value in an
274 -- attribute-list declaration.
276 -- Note that non-validating processors are not obligated to read and
277 -- process entity declarations occurring in parameter entities or in the
278 -- external subset; for such documents, the rule that an entity must be
279 -- declared is a well-formedness constraint only if standalone='yes'.
280 | Error_Closing_tag_unexpected QName QName
281 -- ^ Well-formedness constraint: Element Type Match.
283 -- The Name in an element's end-tag MUST match the element type in the start-tag.
284 | Error_Attribute_collision QName
285 -- ^ Well-formedness constraint: Unique Att Spec.
287 -- An attribute name MUST NOT appear more than once in the same start-tag or empty-element tag.
288 | Error_PI_reserved PName
289 -- ^ The target names " XML ", " xml ", and so on are reserved for standardization.
290 | Error_Namespace_prefix_unknown NCName
291 -- ^ Namespace constraint: Prefix Declared
293 -- The namespace prefix, unless it is xml or xmlns, MUST have been declared in a namespace declaration attribute in either the start-tag of the element where the prefix is used or in an ancestor element (i.e., an element in whose content the prefixed markup occurs).
294 | Error_Namespace_empty NCName
295 -- ^ Namespace constraint: No Prefix Undeclaring
297 -- In a namespace declaration for a prefix (i.e., where the NSAttName is a PrefixedAttName), the attribute value MUST NOT be empty.
298 | Error_Namespace_reserved Namespace
299 | Error_Namespace_reserved_prefix NCName
300 -- ^ Namespace constraint: Reserved Prefixes and Namespace Names
302 -- The prefix xml is by definition bound to the namespace name
303 -- http://www.w3.org/XML/1998/namespace. It MAY, but need not, be
304 -- declared, and MUST NOT be bound to any other namespace name. Other
305 -- prefixes MUST NOT be bound to this namespace name, and it MUST NOT be
306 -- declared as the default namespace.
308 -- The prefix xmlns is used only to declare namespace bindings and is by
309 -- definition bound to the namespace name http://www.w3.org/2000/xmlns/.
310 -- It MUST NOT be declared . Other prefixes MUST NOT be bound to this
311 -- namespace name, and it MUST NOT be declared as the default namespace.
312 -- Element names MUST NOT have the prefix xmlns.
314 -- All other prefixes beginning with the three-letter sequence x, m, l, in
315 -- any case combination, are reserved. This means that:
317 -- - users SHOULD NOT use them except as defined by later specifications
318 -- - processors MUST NOT treat them as fatal errors.
319 deriving (Eq,Ord,Show)
320 instance P.ShowErrorComponent Error where
321 showErrorComponent = show
324 p_error :: e -> Parser e s a
325 p_error = P.fancyFailure . Set.singleton . P.ErrorCustom
327 p_quoted :: P.Tokens s ~ TL.Text => (Char -> Parser e s a) -> Parser e s a
329 P.between (P.char '"') (P.char '"') (p '"') <|>
330 P.between (P.char '\'') (P.char '\'') (p '\'')
332 p_until :: P.Tokens s ~ TL.Text => (Char -> Bool) -> (Char, TL.Text) -> Parser e s TL.Text
333 p_until content (end, end_) =
334 (TL.concat <$>) $ P.many $
335 P.takeWhile1P Nothing (\c -> content c && c /= end) <|>
336 P.try (TL.singleton <$> P.char end <* P.notFollowedBy (P.string end_))
338 p_until1 :: P.Tokens s ~ TL.Text => (Char -> Bool) -> (Char, TL.Text) -> Parser e s TL.Text
339 p_until1 content (end, end_) =
340 (TL.concat <$>) $ P.some $
341 P.takeWhile1P Nothing (\c -> content c && c /= end) <|>
342 P.try (TL.singleton <$> P.char end <* P.notFollowedBy (P.string end_))