1 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE NoMonomorphismRestriction #-}
5 {-# LANGUAGE OverloadedStrings #-}
6 {-# LANGUAGE StandaloneDeriving #-}
7 {-# OPTIONS_GHC -fno-warn-tabs #-}
8 module Language.Symantic.Parsing.EBNF where
10 import Control.Applicative (Applicative(..))
12 import Data.Bool as Bool
13 import Data.Char as Char
14 import qualified Data.List as List
15 import Data.Semigroup hiding (option)
16 import Data.String (IsString(..))
17 import Data.Text (Text)
18 import qualified Data.Text as Text
19 import Prelude hiding (any)
21 import Language.Symantic.Parsing.Grammar
24 -- | Extended Backus-Naur-Form, following the
25 -- <http://standards.iso.org/ittf/PubliclyAvailableStandards/s026153_ISO_IEC_14977_1996(E).zip ISO-IEC-14977>
26 -- notations, augmented with the following notations:
28 -- * @("U+", code_point)@: for <http://unicode.org/versions/Unicode8.0.0/ ISO-IEC-10646> (aka. Unicode).
29 -- * @(char, "…", char)@: for character range.
30 -- * @(rule, "&", rule)@: for the intersection.
31 -- * @(rule, "-", rule)@: for the difference.
32 -- * @(rule, " ", rule)@: for rule application.
33 data EBNF a = EBNF { unEBNF :: RuleMode -> (Infix, LR) -> Text }
35 runEBNF :: EBNF a -> Text
36 runEBNF (EBNF g) = g RuleMode_Body (infixN0, L)
38 -- | Get textual rendition of given EBNF rule.
39 renderEBNF :: RuleDef a -> Text
40 renderEBNF = runEBNF . unRuleDef
42 ebnf_const :: Text -> EBNF a
43 ebnf_const t = EBNF $ \_rm _op -> t
47 = RuleMode_Body -- ^ Generate the body of the rule.
48 | RuleMode_Ref -- ^ Generate a ref to the rule.
52 newtype RuleDef a = RuleDef { unRuleDef :: EBNF a }
53 deriving (Functor, Gram_Terminal, Applicative, App)
54 deriving instance Alter RuleDef
55 deriving instance Alt RuleDef
56 deriving instance Gram_RegL RuleDef
57 deriving instance Gram_RegR RuleDef
58 deriving instance Gram_CF RuleDef
59 deriving instance Gram_RuleDef RuleDef
60 deriving instance Gram_RuleDef g => Gram_RuleDef (RegR g)
61 deriving instance Gram_RuleDef g => Gram_RuleDef (RegL g)
62 deriving instance Gram_RuleDef g => Gram_RuleDef (CF g)
63 instance Gram_Rule RuleDef where
64 rule n = rule_def (ebnf_const n)
65 rule1 n g a = rule_def (ebnf_const n `ebnf_arg` unRuleDef a) (g a)
66 rule2 n g a b = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b) (g a b)
67 rule3 n g a b c = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b `ebnf_arg` unRuleDef c) (g a b c)
68 rule4 n g a b c d = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b `ebnf_arg` unRuleDef c `ebnf_arg` unRuleDef d) (g a b c d)
69 instance Gram_Meta meta RuleDef where
70 metaG (RuleDef x) = RuleDef $ metaG x
71 instance Gram_Lexer RuleDef
73 -- *** Class 'Gram_RuleDef'
74 class Gram_RuleDef g where
75 rule_def :: EBNF () -> g a -> RuleDef a
76 rule_arg :: Text -> g a
78 -- | Helper for 'Gram_Rule' 'EBNF'.
79 ebnf_arg :: EBNF a -> EBNF b -> EBNF ()
80 ebnf_arg (EBNF a) (EBNF b) = EBNF $ \bo po -> infix_paren po op $
81 a bo (op, L) <> " " <> b bo (op, R)
85 instance Gram_RuleDef EBNF where
88 RuleDef $ EBNF $ \mo po ->
90 RuleMode_Ref -> unEBNF call mo po
92 Text.intercalate " " $
93 [ unEBNF call RuleMode_Ref (infixN0, L)
95 , unEBNF body RuleMode_Ref (infixN0, R)
98 instance IsString (EBNF String) where
100 instance Show (EBNF a) where
101 show = Text.unpack . runEBNF
102 instance Gram_Rule EBNF where
103 rule n g = EBNF $ \rm po ->
105 RuleMode_Body -> unEBNF g RuleMode_Ref po
107 rule1 n g a = EBNF $ \rm po ->
109 RuleMode_Body -> unEBNF (g a) RuleMode_Ref po
110 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a) RuleMode_Ref po
111 rule2 n g a b = EBNF $ \rm po ->
113 RuleMode_Body -> unEBNF (g a b) RuleMode_Ref po
114 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b) RuleMode_Ref po
115 rule3 n g a b c = EBNF $ \rm po ->
117 RuleMode_Body -> unEBNF (g a b c) RuleMode_Ref po
118 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b `ebnf_arg` c) RuleMode_Ref po
119 rule4 n g a b c d = EBNF $ \rm po ->
121 RuleMode_Body -> unEBNF (g a b c d) RuleMode_Ref po
122 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b `ebnf_arg` c `ebnf_arg` d) RuleMode_Ref po
123 instance Functor EBNF where
124 fmap _f (EBNF x) = EBNF x
125 instance Applicative EBNF where
127 EBNF f <*> EBNF x = EBNF $ \bo po -> infix_paren po op $
128 f bo (op, L) <> ", " <> x bo (op, R)
129 where op = infixB L 10
131 instance Alter EBNF where
132 empty = ebnf_const $ "\"\""
133 EBNF g <+> EBNF q = EBNF $ \bo po -> infix_paren po op $
134 g bo (op, L) <> " | " <> q bo (op, R)
135 where op = infixB L 2
138 choice l@(_:_) = EBNF $ \bo po -> infix_paren po op $
139 Text.intercalate " | " $
140 (unEBNF <$> l) <*> pure bo <*> pure (op, L)
141 where op = infixB L 2
142 instance Alt EBNF where
143 many (EBNF g) = EBNF $ \rm _po -> "{" <> g rm (op, L) <> "}" where op = infixN0
144 some (EBNF g) = EBNF $ \rm _po -> "{" <> g rm (op, L) <> "}-" where op = infixN0
145 option _x (EBNF g) = EBNF $ \rm _po ->
146 "[" <> g rm (op, L) <> "]" where op = infixN0
147 instance Gram_Terminal EBNF where
149 Terminal (EBNF f) `but` Terminal (EBNF g) =
150 Terminal $ EBNF $ \bo po -> infix_paren po op $
151 f bo (op, L) <> " - " <> g bo (op, R)
153 eoi = ebnf_const "EOF"
154 char = ebnf_const . escape
156 escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""]
157 escape c = Text.concat ["U+", Text.pack $ show $ ord c]
159 case List.break (\c -> Bool.not (Char.isPrint c) || c == '"') s of
161 ("", [c]) -> "" <$ char c
162 (ps, [c]) -> "" <$ raw ps <* char c
163 ("", c:rs) -> "" <$ char c <* string rs
164 (ps, c:rs) -> "" <$ raw ps <* char c <* string rs
166 raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""]
167 unicat = ebnf_const . Text.pack . show
168 range (l, h) = ebnf_const $ Text.concat
173 instance Gram_RegR EBNF where
174 Terminal f .*> Reg x = Reg $ f <*> x
175 manyR = Reg . many . unTerminal
176 someR = Reg . some . unTerminal
177 instance Gram_RegL EBNF where
178 Reg f <*. Terminal x = Reg $ f <*> x
179 manyL = Reg . many . unTerminal
180 someL = Reg . some . unTerminal
181 instance Gram_CF EBNF where
182 CF (EBNF f) <& Reg (EBNF g) = CF $ EBNF $ \bo po -> infix_paren po op $
183 f bo (op, L) <> " & " <> g bo (op, R)
184 where op = infixB L 4
185 Reg (EBNF f) &> CF (EBNF g) = CF $ EBNF $ \bo po -> infix_paren po op $
186 f bo (op, L) <> " & " <> g bo (op, R)
187 where op = infixB L 4
188 CF (EBNF f) `minus` Reg (EBNF g) = CF $ EBNF $ \bo po -> infix_paren po op $
189 f bo (op, L) <> " - " <> g bo (op, R)
191 instance Gram_Meta meta EBNF where
192 metaG (EBNF x) = EBNF x
193 instance Gram_Lexer EBNF
195 gram_lexer :: forall g . (Gram_Lexer g, Gram_RuleDef g) => [CF g ()]
197 [ void $ commentable (void $ rule_arg "space") (void $ rule_arg "line") (void $ rule_arg "block")
198 , void $ comment_line (rule_arg "prefix")
199 , void $ comment_block (rule_arg "start") (rule_arg "end" :: RegL g String)
200 , void $ lexeme (rule_arg "g")
201 , void $ parens (rule_arg "g")
202 , void $ operators (rule_arg "expr") (rule_arg "prefix") (rule_arg "infix") (rule_arg "postfix")
203 , void $ inside id (rule_arg "begin") (rule_arg "i") (rule_arg "end") (rule_arg "next")