{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE NoMonomorphismRestriction #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE StandaloneDeriving #-} {-# OPTIONS_GHC -fno-warn-tabs #-} module Language.Symantic.Parsing.EBNF where import Control.Applicative (Applicative(..), Alternative(..)) import Control.Monad import Data.Bool as Bool import Data.Char as Char import qualified Data.List as List import Data.Semigroup hiding (option) import Data.String (IsString(..)) import Data.Text (Text) import qualified Data.Text as Text import Prelude hiding (any) import Language.Symantic.Parsing.Grammar -- * Type 'EBNF' -- | Extended Backus-Naur-Form, following the -- -- notations, augmented with the following notations: -- -- * @("U+", code_point)@: for (aka. Unicode). -- * @(char, "…", char)@: for character range. -- * @(rule, "&", rule)@: for the intersection. -- * @(rule, "-", rule)@: for the difference. -- * @(rule, " ", rule)@: for rule application. data EBNF a = EBNF { unEBNF :: RuleMode -> (Infix, LR) -> Text } runEBNF :: EBNF a -> Text runEBNF (EBNF g) = g RuleMode_Body (infixN0, L) -- | Get textual rendition of given EBNF rule. renderEBNF :: RuleDef a -> Text renderEBNF = runEBNF . unRuleDef ebnf_const :: Text -> EBNF a ebnf_const t = EBNF $ \_rm _op -> t -- ** Type 'RuleMode' data RuleMode = RuleMode_Body -- ^ Generate the body of the rule. | RuleMode_Ref -- ^ Generate a ref to the rule. deriving (Eq, Show) -- ** Type 'RuleDef' newtype RuleDef a = RuleDef { unRuleDef :: EBNF a } deriving (Functor, Gram_Terminal, Applicative, App , Alternative, Alter, Alt, Gram_RegL, Gram_RegR, Gram_CF) deriving instance Gram_RuleDef RuleDef deriving instance Gram_RuleDef g => Gram_RuleDef (RegR g) deriving instance Gram_RuleDef g => Gram_RuleDef (RegL g) deriving instance Gram_RuleDef g => Gram_RuleDef (CF g) instance Gram_Rule RuleDef where rule n = rule_def (ebnf_const n) rule1 n g a = rule_def (ebnf_const n `ebnf_arg` unRuleDef a) (g a) rule2 n g a b = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b) (g a b) rule3 n g a b c = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b `ebnf_arg` unRuleDef c) (g a b c) rule4 n g a b c d = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b `ebnf_arg` unRuleDef c `ebnf_arg` unRuleDef d) (g a b c d) instance Gram_Meta meta RuleDef where metaG (RuleDef x) = RuleDef $ metaG x instance Gram_Lexer RuleDef -- *** Class 'Gram_RuleDef' class Gram_RuleDef g where rule_def :: EBNF () -> g a -> RuleDef a rule_arg :: Text -> g a -- | Helper for 'Gram_Rule' 'EBNF'. ebnf_arg :: EBNF a -> EBNF b -> EBNF () ebnf_arg (EBNF a) (EBNF b) = EBNF $ \bo po -> infix_paren po op $ a bo (op, L) <> " " <> b bo (op, R) where op = infixL 11 infixl 5 `ebnf_arg` instance Gram_RuleDef EBNF where rule_arg = ebnf_const rule_def call body = RuleDef $ EBNF $ \mo po -> case mo of RuleMode_Ref -> unEBNF call mo po RuleMode_Body -> Text.intercalate " " $ [ unEBNF call RuleMode_Ref (infixN0, L) , "=" , unEBNF body RuleMode_Ref (infixN0, R) , ";" ] instance IsString (EBNF String) where fromString = string instance Show (EBNF a) where show = Text.unpack . runEBNF instance Gram_Rule EBNF where rule n g = EBNF $ \rm po -> case rm of RuleMode_Body -> unEBNF g RuleMode_Ref po RuleMode_Ref -> n rule1 n g a = EBNF $ \rm po -> case rm of RuleMode_Body -> unEBNF (g a) RuleMode_Ref po RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a) RuleMode_Ref po rule2 n g a b = EBNF $ \rm po -> case rm of RuleMode_Body -> unEBNF (g a b) RuleMode_Ref po RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b) RuleMode_Ref po rule3 n g a b c = EBNF $ \rm po -> case rm of RuleMode_Body -> unEBNF (g a b c) RuleMode_Ref po RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b `ebnf_arg` c) RuleMode_Ref po rule4 n g a b c d = EBNF $ \rm po -> case rm of RuleMode_Body -> unEBNF (g a b c d) RuleMode_Ref po RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b `ebnf_arg` c `ebnf_arg` d) RuleMode_Ref po instance Functor EBNF where fmap _f (EBNF x) = EBNF x instance Applicative EBNF where pure _ = empty EBNF f <*> EBNF x = EBNF $ \bo po -> infix_paren po op $ f bo (op, L) <> ", " <> x bo (op, R) where op = infixB L 10 instance App EBNF instance Alternative EBNF where empty = ebnf_const $ "\"\"" EBNF g <|> EBNF q = EBNF $ \bo po -> infix_paren po op $ g bo (op, L) <> " | " <> q bo (op, R) where op = infixB L 2 many (EBNF g) = EBNF $ \rm _po -> "{" <> g rm (op, L) <> "}" where op = infixN0 some (EBNF g) = EBNF $ \rm _po -> "{" <> g rm (op, L) <> "}-" where op = infixN0 instance Alter EBNF where nil = ebnf_const $ "\"\"" choice [] = empty choice [g] = g choice l@(_:_) = EBNF $ \bo po -> infix_paren po op $ Text.intercalate " | " $ (unEBNF <$> l) <*> pure bo <*> pure (op, L) where op = infixB L 2 star (EBNF g) = EBNF $ \rm _po -> "{" <> g rm (op, L) <> "}" where op = infixN0 instance Alt EBNF where option _x (EBNF g) = EBNF $ \rm _po -> "[" <> g rm (op, L) <> "]" where op = infixN0 instance Gram_Terminal EBNF where any = ebnf_const "_" Terminal (EBNF f) `but` Terminal (EBNF g) = Terminal $ EBNF $ \bo po -> infix_paren po op $ f bo (op, L) <> " - " <> g bo (op, R) where op = infixL 6 eoi = ebnf_const "EOF" char = ebnf_const . escape where escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""] escape c = Text.concat ["U+", Text.pack $ show $ ord c] string s = case List.break (\c -> Bool.not (Char.isPrint c) || c == '"') s of (ps, "") -> raw ps ("", [c]) -> "" <$ char c (ps, [c]) -> "" <$ raw ps <* char c ("", c:rs) -> "" <$ char c <* string rs (ps, c:rs) -> "" <$ raw ps <* char c <* string rs where raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""] unicat = ebnf_const . Text.pack . show range (l, h) = ebnf_const $ Text.concat [ runEBNF $ char l , "…" , runEBNF $ char h ] instance Gram_RegR EBNF where Terminal f .*> Reg x = Reg $ f <*> x manyR = Reg . many . unTerminal someR = Reg . some . unTerminal instance Gram_RegL EBNF where Reg f <*. Terminal x = Reg $ f <*> x manyL = Reg . many . unTerminal someL = Reg . some . unTerminal instance Gram_CF EBNF where CF (EBNF f) <& Reg (EBNF g) = CF $ EBNF $ \bo po -> infix_paren po op $ f bo (op, L) <> " & " <> g bo (op, R) where op = infixB L 4 Reg (EBNF f) &> CF (EBNF g) = CF $ EBNF $ \bo po -> infix_paren po op $ f bo (op, L) <> " & " <> g bo (op, R) where op = infixB L 4 CF (EBNF f) `minus` Reg (EBNF g) = CF $ EBNF $ \bo po -> infix_paren po op $ f bo (op, L) <> " - " <> g bo (op, R) where op = infixL 6 instance Gram_Meta meta EBNF where metaG (EBNF x) = EBNF x instance Gram_Lexer EBNF gram_lexer :: forall g . (Gram_Lexer g, Gram_RuleDef g) => [CF g ()] gram_lexer = [ void $ commentable (void $ rule_arg "space") (void $ rule_arg "line") (void $ rule_arg "block") , void $ comment_line (rule_arg "prefix") , void $ comment_block (rule_arg "start") (rule_arg "end" :: RegL g String) , void $ lexeme (rule_arg "g") , void $ parens (rule_arg "g") , void $ operators (rule_arg "expr") (rule_arg "prefix") (rule_arg "infix") (rule_arg "postfix") , void $ inside id (rule_arg "begin") (rule_arg "i") (rule_arg "end") (rule_arg "next") ]