import Data.String (String)
import Text.Show (Show(..))
import qualified Data.Sequence as Seq
+import qualified Data.Text.Lazy as TL
import qualified Language.Symantic.XML as XML
, Sym_Rule repr
, Sym_Interleaved repr
) => Sym_RNC repr where
- namespace :: Maybe XML.NCName -> XML.Namespace -> repr ()
- element :: XML.QName -> repr a -> repr a
- attribute :: XML.QName -> repr a -> repr a
- any :: repr ()
- anyElem :: XML.Namespace -> (XML.NCName -> repr a) -> repr a
- text :: repr XML.Text
- fail :: repr a
- try :: repr a -> repr a
- option :: a -> repr a -> repr a
- optional :: repr a -> repr (Maybe a)
- choice :: [repr a] -> repr a
- intermany :: [repr a] -> repr [a]
+ namespace :: Maybe XML.NCName -> XML.Namespace -> repr ()
+ element :: XML.QName -> repr a -> repr a
+ attribute :: XML.QName -> repr a -> repr a
+ any :: repr ()
+ anyElem :: XML.Namespace -> (XML.NCName -> repr a) -> repr a
+ escapedText :: repr XML.EscapedText
+ text :: repr TL.Text
+ text = XML.unescapeText <$> escapedText
+ fail :: repr a
+ try :: repr a -> repr a
+ option :: a -> repr a -> repr a
+ optional :: repr a -> repr (Maybe a)
+ choice :: [repr a] -> repr a
+ intermany :: [repr a] -> repr [a]
intermany = many . choice . (try <$>)
manySeq :: repr a -> repr (Seq a)
manySeq r = Seq.fromList <$> many r
rule _n = id
arg :: String -> repr ()
-
-- ** Type 'RuleMode'
data RuleMode
= RuleMode_Body -- ^ Request to generate the body of the rule.
if (`all` xs) $ \case
XML.Tree (XML.unSourced -> XML.NodeText txt) _ts ->
all (\case
- XML.TextLexemePlain t -> TL.all Char.isSpace t
+ XML.EscapedPlain t -> TL.all Char.isSpace t
_ -> False) txt
_ -> True
then (`Seq.filter` xs) $ \case
ex = Set.singleton $ P.Tokens $ pure expected
expected = XML.Tree0 (XML.notSourced $ XML.NodeComment "")
-}
- text = do
+ escapedText = do
P.token check (Just expected)
<* XML.setFilePosToNextNode
where
parserElement :: Ord e => XML.QName -> P.Parsec e XMLs a -> XMLs -> P.Parsec e XMLs a
parserElement _n = XML.parser
-{-
-
--- * Type 'State'
-data State = State
- { state_posXML :: XML.Pos
- , state_source :: XML.FileSource
- -- ^ Unfortunately Megaparsec's 'P.statePos'
- -- is not a good fit to encode 'XML.Source'.
- } deriving (Eq,Show)
-instance Default State where
- def = State
- { state_posXML = def
- , state_source = def
- }
-
--- * Type 'Parser'
-type Parser = S.StateT State (P.Parsec ErrorRead XMLs)
-
-instance RNC.Sym_Rule Parser where
- -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
- rule _n = id
-instance RNC.Sym_RNC Parser where
- {-
- none = P.label "none" $ P.eof
- -}
- fail = P.label "fail" $ P.failure Nothing mempty
- any = P.label "any" $ p_satisfyMaybe $ const $ Just ()
- anyElem p = P.label "anyElem" $ do
- XML.Sourced state_source (n,ts) <- P.token check $ Just expected
- parserElement n (p n) (XML.Sourced state_source ts)
- where
- expected = XML.Tree (XML.notSourced $ XML.NodeElem "*") mempty
- check (XML.Tree cell@(XML.unSourced -> XML.NodeElem e) ts) = Right $ (e,ts) <$ cell
- check t = Left
- ( Just $ P.Tokens $ pure t
- , Set.singleton $ P.Tokens $ pure expected )
- element n p = do
- ts <- P.token check $ Just expected
- parserElement n p ts
- where
- expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty
- check (XML.Tree cell@(XML.unSourced -> XML.NodeElem e) ts) | e == n = Right (ts <$ cell)
- check t = Left
- ( Just $ P.Tokens $ pure t
- , Set.singleton $ P.Tokens $ pure expected )
- attribute n p = do
- v <- P.token check $ Just expected
- stateParser p $ Seq.singleton $ Tree0 v
- where
- expected = Tree0 (XML.notSourced $ XML.NodeAttr n "")
- check (XML.Tree0 cell@(XML.unSourced -> XML.NodeAttr k v)) | k == n =
- Right $ XML.NodeText v <$ cell
- check t = Left
- ( Just $ P.Tokens $ pure t
- , Set.singleton $ P.Tokens $ pure expected )
- {-
- comment = do
- s <- P.getInput
- case Seq.viewl s of
- XML.Tree0 (XML.unSourced -> XML.NodeComment c) :< ts -> do
- P.setInput ts
- c <$ setFilePosToNextNode
- t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
- EmptyL -> P.failure Nothing ex
- where
- ex = Set.singleton $ P.Tokens $ pure expected
- expected = Tree0 (XML.notSourced $ XML.NodeComment "")
- -}
- text = do
- P.token check (Just expected)
- <* setFilePosToNextNode
- where
- expected = Tree0 (XML.notSourced $ XML.NodeText "")
- check (XML.Tree0 (XML.unSourced -> XML.NodeText t)) = Right t
- check t = Left
- ( Just $ P.Tokens $ pure t
- , Set.singleton $ P.Tokens $ pure expected )
- optional = P.optional
- option = P.option
- choice = P.choice
- try = P.try
-
-parserElement :: XML.Name -> Parser a -> XML.Sourced XMLs -> Parser a
-parserElement n p (XML.Sourced state_source ts) = do
- let mayNameOrFigureName
- | n == "aside" = Nothing
- -- NOTE: skip aside.
- | n == "figure"
- -- NOTE: special case renaming the current XML.XmlPos
- -- using the @type attribute to have positions like this:
- -- section1.Quote1
- -- section1.Example1
- -- section1.Quote2
- -- instead of:
- -- section1.figure1
- -- section1.figure2
- -- section1.figure3
- , Just ty <- getFirst $ (`foldMap` ts) $ \case
- XML.Tree0 (XML.unSourced -> XML.NodeAttr "type" ty) -> First $ Just ty
- _ -> First Nothing
- = Just $ XML.qName $ ty
- | otherwise = Just n
- case mayNameOrFigureName of
- Nothing -> do
- st <- S.get
- S.put st{state_source}
- res <- stateParser p ts
- S.put st
- return res
- Just nameOrFigureName -> do
- st@State{state_posXML} <- S.get
- let incrPrecedingSibling name =
- maybe (Nat1 1) succNat1 $
- Map.lookup name $
- XML.pos_precedingSiblings state_posXML
- S.put State
- { state_posXML = state_posXML
- -- NOTE: in children, push current name incremented on ancestors
- -- and reset preceding siblings.
- { XML.pos_precedingSiblings = mempty
- , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
- , XML.pos_ancestorsWithFigureNames =
- XML.pos_ancestorsWithFigureNames state_posXML |>
- ( nameOrFigureName
- , incrPrecedingSibling nameOrFigureName )
- }
- , state_source
- }
- res <- stateParser p ts
- S.put st
- { state_posXML = state_posXML
- -- NOTE: after current, increment current name
- -- and reset ancestors.
- { XML.pos_precedingSiblings =
- (if n == nameOrFigureName then id
- else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
- Map.insertWith (const succNat1) n (Nat1 1) $
- XML.pos_precedingSiblings state_posXML
- }
- }
- return res
-
-type instance RNC.Perm Parser = P.PermParser XMLs Parser
-instance RNC.Sym_Interleaved Parser where
- interleaved = P.makePermParser
- (<$$>) = (P.<$$>)
- (<||>) = (P.<||>)
- (<$?>) = (P.<$?>)
- (<|?>) = (P.<|?>)
- f <$*> a = f P.<$?> ([],P.some a)
- f <|*> a = f P.<|?> ([],P.some a)
-{-
-instance DTC.Sym_DTC Parser where
- positionXML = S.gets state_posXML
- locationTCT = S.gets state_source
--}
-
-
-
-
--- ** Type 'ErrorRead'
-data ErrorRead
- = ErrorRead_EndOfInput
- | ErrorRead_Not_Bool TL.Text
- | ErrorRead_Not_Int TL.Text
- | ErrorRead_Not_Nat Int
- | ErrorRead_Not_Nat1 Int
- | ErrorRead_Not_Rational TL.Text
- | ErrorRead_Not_Positive TL.Text
- {- ErrorRead_Unexpected P.sourcePos XML -}
- deriving (Eq,Ord,Show)
-instance P.ShowErrorComponent ErrorRead where
- showErrorComponent = show
--}
"attribute "<>TL.pack (show $ XML.prefixifyQName ns n)
<>" "<>w ns rm (op,SideR) pairBrace
where op = infixN 10
- try = id
- fail = writeText "fail"
- text = writeText "text"
- any = writeText "any"
- choice [] = writeText "empty"
- choice [w] = w
+ try = id
+ fail = writeText "fail"
+ escapedText = writeText "text"
+ text = writeText "text"
+ any = writeText "any"
+ choice [] = writeText "empty"
+ choice [w] = w
choice l@(_:_) = Writer $ \ns rm po pp ->
pairIfNeeded pp po op $
TL.intercalate " | " $
HM.insert qNameSpace Nothing $ XML.namespaces_prefixes ns}
try = id
fail = mempty
+ escapedText = mempty
text = mempty
any = mempty
choice = mconcat
= NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
| NodeAttr QName -- ^ Node with a 'NodeText' child.
| NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
- | NodeText Text -- ^ Leaf.
+ | NodeText EscapedText -- ^ Leaf.
| NodeComment TL.Text -- ^ Leaf.
| NodeCDATA TL.Text -- ^ Leaf.
deriving (Eq, Ord, Show)
--- ** Type 'Text'
-type Text = [TextLexeme]
+-- ** Type 'EscapedText'
+type EscapedText = [Escaped]
-escapeText :: TL.Text -> Text
+escapeText :: TL.Text -> EscapedText
escapeText s =
case TL.span (`List.notElem` ("<>&'\""::String)) s of
(t, r) | TL.null t -> escape r
- | otherwise -> TextLexemePlain t : escape r
+ | otherwise -> EscapedPlain t : escape r
where
escape t = case TL.uncons t of
Nothing -> []
Just (c, cs) -> escapeChar c : escapeText cs
escapeChar c =
case c of
- '<' -> TextLexemeEntityRef entityRef_lt
- '>' -> TextLexemeEntityRef entityRef_gt
- '&' -> TextLexemeEntityRef entityRef_amp
- '\'' -> TextLexemeEntityRef entityRef_apos
- '"' -> TextLexemeEntityRef entityRef_quot
- _ -> TextLexemePlain $ TL.singleton c
-
-flatText :: Text -> TL.Text
-flatText = foldMap $ \case
- TextLexemePlain t -> t
- TextLexemeEntityRef EntityRef{..} -> entityRef_value
- TextLexemeCharRef (CharRef c) -> TL.singleton c
-
--- *** Type 'TextLexeme'
-data TextLexeme
- = TextLexemePlain TL.Text
- | TextLexemeEntityRef EntityRef
- | TextLexemeCharRef CharRef
+ '<' -> EscapedEntityRef entityRef_lt
+ '>' -> EscapedEntityRef entityRef_gt
+ '&' -> EscapedEntityRef entityRef_amp
+ '\'' -> EscapedEntityRef entityRef_apos
+ '"' -> EscapedEntityRef entityRef_quot
+ _ -> EscapedPlain $ TL.singleton c
+
+unescapeText :: EscapedText -> TL.Text
+unescapeText = foldMap $ \case
+ EscapedPlain t -> t
+ EscapedEntityRef EntityRef{..} -> entityRef_value
+ EscapedCharRef (CharRef c) -> TL.singleton c
+
+-- *** Type 'Escaped'
+-- | 'EscapedText' lexemes.
+data Escaped
+ = EscapedPlain TL.Text
+ | EscapedEntityRef EntityRef
+ | EscapedCharRef CharRef
deriving (Eq, Ord, Show)
-- *** Type 'EntityRef'
<$> P.string "1."
<*> P.takeWhile1P Nothing Char.isDigit
return $ Tree (Sourced c $ NodeAttr "version") $ pure $
- TS.tree0 $ NodeText . pure . TextLexemePlain <$> v
+ TS.tree0 $ NodeText . pure . EscapedPlain <$> v
p_EncodingDecl :: P.Tokens s ~ TL.Text => Parser Error s XML
p_EncodingDecl = P.label "EncodingDecl" $ do
p_Eq
p_quoted $ const $ p_Sourced p_EncName
return $ Tree (Sourced c $ NodeAttr "encoding") $ pure $
- TS.tree0 $ NodeText . pure . TextLexemePlain <$> v
+ TS.tree0 $ NodeText . pure . EscapedPlain <$> v
p_EncName :: P.Tokens s ~ TL.Text => Parser Error s TL.Text
p_EncName = P.label "EncName" $ do
p_Eq
v <- p_quoted $ const $ p_Sourced $ P.string "yes" <|> P.string "no"
return $ Tree (Sourced ca $ NodeAttr "standalone") $ pure $
- TS.tree0 $ NodeText . pure . TextLexemePlain <$> v
+ TS.tree0 $ NodeText . pure . EscapedPlain <$> v
-- ** CharData
-p_CharData :: P.Tokens s ~ TL.Text => Parser e s [TextLexeme]
+p_CharData :: P.Tokens s ~ TL.Text => Parser e s [Escaped]
p_CharData =
escapeText
<$> p_until1 (\c -> XC.isXmlChar c && c/='<' && c/='&') (']',"]>")
elemNS :: HM.HashMap NCName Namespace <-
(HM.fromList . List.concat <$>) $ forM as $ \case
Sourced _ (PName{..}, Sourced _ av)
- | ns <- Namespace $ flatText av
+ | ns <- Namespace $ unescapeText av
, Nothing <- pNameSpace
, NCName "xmlns" <- pNameLocal ->
-- NOTE: default namespace declaration.
|| ns == xmlns_xmlns -- DOC: it MUST NOT be declared as the default namespace
-> p_error $ Error_Namespace_reserved ns
_ -> return [(NCName "" , ns)]
- | ns <- Namespace $ flatText av
+ | ns <- Namespace $ unescapeText av
, Just (NCName "xmlns") <- pNameSpace ->
-- NOTE: namespace prefix declaration.
case unNCName pNameLocal of
| otherwise -> do
ns <- lookupNamePrefix prefix
return QName{qNameSpace=ns, qNameLocal=pNameLocal n}
- elemAttrs :: [Sourced FileSource (QName, Sourced FileSource Text)] <-
+ elemAttrs :: [Sourced FileSource (QName, Sourced FileSource EscapedText)] <-
-- NOTE: expand attributes' PName into QName.
forM as $ \s@Sourced{unSourced=(an, av)} -> do
ns <- maybe (return "") lookupNamePrefix $ pNameSpace an
let qn = QName{qNameSpace=ns, qNameLocal=pNameLocal an}
return s{unSourced=(qn, av)}
-- NOTE: check for attribute collision.
- let attrsByQName :: HM.HashMap QName [Sourced FileSource (QName, Sourced FileSource Text)] =
+ let attrsByQName :: HM.HashMap QName [Sourced FileSource (QName, Sourced FileSource EscapedText)] =
HM.fromListWith (<>) $ (<$> elemAttrs) $ \a@(Sourced _c (an, _av)) -> (an, [a])
case HM.toList $ HM.filter (\x -> length x > 1) attrsByQName of
(an, _):_ -> p_error $ Error_Attribute_collision an
return $ Tree (cell $ NodeElem elemName) content
-- *** Attribute
-p_Attribute :: P.Tokens s ~ TL.Text => Parser Error s (Sourced FileSource (PName, Sourced FileSource Text))
+p_Attribute :: P.Tokens s ~ TL.Text => Parser Error s (Sourced FileSource (PName, Sourced FileSource EscapedText))
p_Attribute = p_Sourced $ (,) <$> p_PName <* p_Eq <*> p_AttValue
-p_AttValue :: P.Tokens s ~ TL.Text => Parser Error s (Sourced FileSource Text)
+p_AttValue :: P.Tokens s ~ TL.Text => Parser Error s (Sourced FileSource EscapedText)
p_AttValue = P.label "AttValue" $ p_quoted p_AttValueText
-p_AttValueText :: P.Tokens s ~ TL.Text => Char -> Parser Error s (Sourced FileSource Text)
+p_AttValueText :: P.Tokens s ~ TL.Text => Char -> Parser Error s (Sourced FileSource EscapedText)
p_AttValueText q = p_Sourced $
P.many
( p_Reference
- <|> TextLexemePlain <$> P.takeWhile1P Nothing (\c ->
+ <|> EscapedPlain <$> P.takeWhile1P Nothing (\c ->
XC.isXmlChar c &&
c `List.notElem` (q:"<&'\">"))
- <|> TextLexemeEntityRef entityRef_gt <$ P.char '>'
+ <|> EscapedEntityRef entityRef_gt <$ P.char '>'
<|> (if q == '\''
- then TextLexemeEntityRef entityRef_quot <$ P.char '"'
- else TextLexemeEntityRef entityRef_apos <$ P.char '\'')
+ then EscapedEntityRef entityRef_quot <$ P.char '"'
+ else EscapedEntityRef entityRef_apos <$ P.char '\'')
)
-- * content
<*> P.takeWhile1P Nothing XC.isXmlNCNameChar
-- * Reference
-p_Reference :: P.Tokens s ~ TL.Text => Parser Error s TextLexeme
+p_Reference :: P.Tokens s ~ TL.Text => Parser Error s Escaped
p_Reference =
- TextLexemeCharRef <$> p_CharRef <|>
- TextLexemeEntityRef <$> p_EntityRef
+ EscapedCharRef <$> p_CharRef <|>
+ EscapedEntityRef <$> p_EntityRef
-- ** EntityRef
p_EntityRef :: P.Tokens s ~ TL.Text => Parser Error s EntityRef
p_S :: P.Tokens s ~ TL.Text => Parser Error s XML
p_S = P.label "Spaces" $
- (\ts -> TS.tree0 (NodeText . pure . TextLexemePlain . TL.concat <$> ts))
+ (\ts -> TS.tree0 (NodeText . pure . EscapedPlain . TL.concat <$> ts))
<$> p_Sourced (P.some $
P.takeWhile1P Nothing XC.isXmlSpaceCharCR <|>
TL.singleton <$> p_CRLF)
build EntityRef{..} = "&"<>build entityRef_name<>";"
instance Buildable CharRef where
build (CharRef c) = "&#"<>build (show (Char.ord c))<>";"
-instance Buildable Text where
+instance Buildable EscapedText where
build = foldMap $ \case
- TextLexemePlain t -> build t
- TextLexemeEntityRef r -> build r
- TextLexemeCharRef r -> build r
+ EscapedPlain t -> build t
+ EscapedEntityRef r -> build r
+ EscapedCharRef r -> build r
-- * Class 'Writable'
class Writeable a where
(`all` xs) $ \case
Tree (Sourced _ (NodeText txt)) _ts ->
all (\case
- TextLexemePlain t -> TL.all Char.isSpace t
+ EscapedPlain t -> TL.all Char.isSpace t
_ -> False) txt
_ -> True
instance Writeable XML where
-- xmlns:prefix="namespace"
| qNameSpace == xmlns_xmlns
, [Tree (Sourced _ (NodeText t)) _] <- toList vs ->
- let n = flatText t in
+ let n = unescapeText t in
(uNS,) dNS
{ namespaces_prefixes =
(if TL.null n
, qNameLocal == NCName "xmlns"
, [Tree (Sourced _ (NodeText t)) _] <- toList vs ->
(uNS,)
- dNS{namespaces_default = Namespace $ flatText t}
+ dNS{namespaces_default = Namespace $ unescapeText t}
-- name="value"
| qNameSpace == xmlns_empty -> (uNS, dNS)
-- {namespace}name="value"
(\acc (Namespace v) p ->
(acc Seq.|>) $
Tree (notSourced $ NodeAttr QName{qNameSpace=xmlns_xmlns, qNameLocal=p}) $
- pure $ tree0 $ notSourced $ NodeText $ pure $ TextLexemePlain v
+ pure $ tree0 $ notSourced $ NodeText $ pure $ EscapedPlain v
) mempty autoNS
let scopeNS = declNS { namespaces_prefixes = autoNS <> inhNS }
return $
then mempty
else build t
-buildAttr :: PName -> Text -> TLB.Builder
+buildAttr :: PName -> EscapedText -> TLB.Builder
buildAttr n v = build n<>"=\""<>buildAttrValue v<>"\""
-buildAttrValue :: Text -> TLB.Builder
+buildAttrValue :: EscapedText -> TLB.Builder
buildAttrValue = foldMap $ \case
- TextLexemePlain p -> build p
- TextLexemeEntityRef EntityRef{..} ->
+ EscapedPlain p -> build p
+ EscapedEntityRef EntityRef{..} ->
build $ TL.replace "\"" """ entityRef_value
- TextLexemeCharRef (CharRef c)
+ EscapedCharRef (CharRef c)
| c == '\"' -> """
| otherwise -> build c
if (`all` xs) $ \case
Tree (Sourced _ (NodeText txt)) _ts ->
all (\case
- TextLexemePlain t -> TL.all Char.isSpace t
+ EscapedPlain t -> TL.all Char.isSpace t
_ -> False) txt
_ -> True
then (`Seq.filter` xs) $ \case
-- PVP: +-+------- breaking API changes
-- | | +----- non-breaking API additions
-- | | | +--- code changes with no API change
-version: 0.0.0.20181024
+version: 0.0.0.20181221
category: Data Structures
synopsis: Library for reading, validating and writing a subset of the XML format.
description: Symantics for an approximative implementation