]> Git — Sourcephile - haskell/symantic.git/blob - Language/Symantic/Parsing/EBNF.hs
Remove Alternative uses in grammars.
[haskell/symantic.git] / Language / Symantic / Parsing / EBNF.hs
1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE NoMonomorphismRestriction #-}
5 {-# LANGUAGE OverloadedStrings #-}
6 {-# LANGUAGE StandaloneDeriving #-}
7 {-# OPTIONS_GHC -fno-warn-tabs #-}
8 module Language.Symantic.Parsing.EBNF where
9
10 import Control.Applicative (Applicative(..))
11 import Control.Monad
12 import Data.Bool as Bool
13 import Data.Char as Char
14 import qualified Data.List as List
15 import Data.Semigroup hiding (option)
16 import Data.String (IsString(..))
17 import Data.Text (Text)
18 import qualified Data.Text as Text
19 import Prelude hiding (any)
20
21 import Language.Symantic.Parsing.Grammar
22
23 -- * Type 'EBNF'
24 -- | Extended Backus-Naur-Form, following the
25 -- <http://standards.iso.org/ittf/PubliclyAvailableStandards/s026153_ISO_IEC_14977_1996(E).zip ISO-IEC-14977>
26 -- notations, augmented with the following notations:
27 --
28 -- * @("U+", code_point)@: for <http://unicode.org/versions/Unicode8.0.0/ ISO-IEC-10646> (aka. Unicode).
29 -- * @(char, "…", char)@: for character range.
30 -- * @(rule, "&", rule)@: for the intersection.
31 -- * @(rule, "-", rule)@: for the difference.
32 -- * @(rule, " ", rule)@: for rule application.
33 data EBNF a = EBNF { unEBNF :: RuleMode -> (Infix, LR) -> Text }
34
35 runEBNF :: EBNF a -> Text
36 runEBNF (EBNF g) = g RuleMode_Body (infixN0, L)
37
38 -- | Get textual rendition of given EBNF rule.
39 renderEBNF :: RuleDef a -> Text
40 renderEBNF = runEBNF . unRuleDef
41
42 ebnf_const :: Text -> EBNF a
43 ebnf_const t = EBNF $ \_rm _op -> t
44
45 -- ** Type 'RuleMode'
46 data RuleMode
47 = RuleMode_Body -- ^ Generate the body of the rule.
48 | RuleMode_Ref -- ^ Generate a ref to the rule.
49 deriving (Eq, Show)
50
51 -- ** Type 'RuleDef'
52 newtype RuleDef a = RuleDef { unRuleDef :: EBNF a }
53 deriving (Functor, Gram_Terminal, Applicative, App)
54 deriving instance Alter RuleDef
55 deriving instance Alt RuleDef
56 deriving instance Gram_RegL RuleDef
57 deriving instance Gram_RegR RuleDef
58 deriving instance Gram_CF RuleDef
59 deriving instance Gram_RuleDef RuleDef
60 deriving instance Gram_RuleDef g => Gram_RuleDef (RegR g)
61 deriving instance Gram_RuleDef g => Gram_RuleDef (RegL g)
62 deriving instance Gram_RuleDef g => Gram_RuleDef (CF g)
63 instance Gram_Rule RuleDef where
64 rule n = rule_def (ebnf_const n)
65 rule1 n g a = rule_def (ebnf_const n `ebnf_arg` unRuleDef a) (g a)
66 rule2 n g a b = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b) (g a b)
67 rule3 n g a b c = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b `ebnf_arg` unRuleDef c) (g a b c)
68 rule4 n g a b c d = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b `ebnf_arg` unRuleDef c `ebnf_arg` unRuleDef d) (g a b c d)
69 instance Gram_Meta meta RuleDef where
70 metaG (RuleDef x) = RuleDef $ metaG x
71 instance Gram_Lexer RuleDef
72
73 -- *** Class 'Gram_RuleDef'
74 class Gram_RuleDef g where
75 rule_def :: EBNF () -> g a -> RuleDef a
76 rule_arg :: Text -> g a
77
78 -- | Helper for 'Gram_Rule' 'EBNF'.
79 ebnf_arg :: EBNF a -> EBNF b -> EBNF ()
80 ebnf_arg (EBNF a) (EBNF b) = EBNF $ \bo po -> infix_paren po op $
81 a bo (op, L) <> " " <> b bo (op, R)
82 where op = infixL 11
83 infixl 5 `ebnf_arg`
84
85 instance Gram_RuleDef EBNF where
86 rule_arg = ebnf_const
87 rule_def call body =
88 RuleDef $ EBNF $ \mo po ->
89 case mo of
90 RuleMode_Ref -> unEBNF call mo po
91 RuleMode_Body ->
92 Text.intercalate " " $
93 [ unEBNF call RuleMode_Ref (infixN0, L)
94 , "="
95 , unEBNF body RuleMode_Ref (infixN0, R)
96 , ";"
97 ]
98 instance IsString (EBNF String) where
99 fromString = string
100 instance Show (EBNF a) where
101 show = Text.unpack . runEBNF
102 instance Gram_Rule EBNF where
103 rule n g = EBNF $ \rm po ->
104 case rm of
105 RuleMode_Body -> unEBNF g RuleMode_Ref po
106 RuleMode_Ref -> n
107 rule1 n g a = EBNF $ \rm po ->
108 case rm of
109 RuleMode_Body -> unEBNF (g a) RuleMode_Ref po
110 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a) RuleMode_Ref po
111 rule2 n g a b = EBNF $ \rm po ->
112 case rm of
113 RuleMode_Body -> unEBNF (g a b) RuleMode_Ref po
114 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b) RuleMode_Ref po
115 rule3 n g a b c = EBNF $ \rm po ->
116 case rm of
117 RuleMode_Body -> unEBNF (g a b c) RuleMode_Ref po
118 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b `ebnf_arg` c) RuleMode_Ref po
119 rule4 n g a b c d = EBNF $ \rm po ->
120 case rm of
121 RuleMode_Body -> unEBNF (g a b c d) RuleMode_Ref po
122 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b `ebnf_arg` c `ebnf_arg` d) RuleMode_Ref po
123 instance Functor EBNF where
124 fmap _f (EBNF x) = EBNF x
125 instance Applicative EBNF where
126 pure _ = empty
127 EBNF f <*> EBNF x = EBNF $ \bo po -> infix_paren po op $
128 f bo (op, L) <> ", " <> x bo (op, R)
129 where op = infixB L 10
130 instance App EBNF
131 instance Alter EBNF where
132 empty = ebnf_const $ "\"\""
133 EBNF g <+> EBNF q = EBNF $ \bo po -> infix_paren po op $
134 g bo (op, L) <> " | " <> q bo (op, R)
135 where op = infixB L 2
136 choice [] = empty
137 choice [g] = g
138 choice l@(_:_) = EBNF $ \bo po -> infix_paren po op $
139 Text.intercalate " | " $
140 (unEBNF <$> l) <*> pure bo <*> pure (op, L)
141 where op = infixB L 2
142 instance Alt EBNF where
143 many (EBNF g) = EBNF $ \rm _po -> "{" <> g rm (op, L) <> "}" where op = infixN0
144 some (EBNF g) = EBNF $ \rm _po -> "{" <> g rm (op, L) <> "}-" where op = infixN0
145 option _x (EBNF g) = EBNF $ \rm _po ->
146 "[" <> g rm (op, L) <> "]" where op = infixN0
147 instance Gram_Terminal EBNF where
148 any = ebnf_const "_"
149 Terminal (EBNF f) `but` Terminal (EBNF g) =
150 Terminal $ EBNF $ \bo po -> infix_paren po op $
151 f bo (op, L) <> " - " <> g bo (op, R)
152 where op = infixL 6
153 eoi = ebnf_const "EOF"
154 char = ebnf_const . escape
155 where
156 escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""]
157 escape c = Text.concat ["U+", Text.pack $ show $ ord c]
158 string s =
159 case List.break (\c -> Bool.not (Char.isPrint c) || c == '"') s of
160 (ps, "") -> raw ps
161 ("", [c]) -> "" <$ char c
162 (ps, [c]) -> "" <$ raw ps <* char c
163 ("", c:rs) -> "" <$ char c <* string rs
164 (ps, c:rs) -> "" <$ raw ps <* char c <* string rs
165 where
166 raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""]
167 unicat = ebnf_const . Text.pack . show
168 range (l, h) = ebnf_const $ Text.concat
169 [ runEBNF $ char l
170 , "…"
171 , runEBNF $ char h
172 ]
173 instance Gram_RegR EBNF where
174 Terminal f .*> Reg x = Reg $ f <*> x
175 manyR = Reg . many . unTerminal
176 someR = Reg . some . unTerminal
177 instance Gram_RegL EBNF where
178 Reg f <*. Terminal x = Reg $ f <*> x
179 manyL = Reg . many . unTerminal
180 someL = Reg . some . unTerminal
181 instance Gram_CF EBNF where
182 CF (EBNF f) <& Reg (EBNF g) = CF $ EBNF $ \bo po -> infix_paren po op $
183 f bo (op, L) <> " & " <> g bo (op, R)
184 where op = infixB L 4
185 Reg (EBNF f) &> CF (EBNF g) = CF $ EBNF $ \bo po -> infix_paren po op $
186 f bo (op, L) <> " & " <> g bo (op, R)
187 where op = infixB L 4
188 CF (EBNF f) `minus` Reg (EBNF g) = CF $ EBNF $ \bo po -> infix_paren po op $
189 f bo (op, L) <> " - " <> g bo (op, R)
190 where op = infixL 6
191 instance Gram_Meta meta EBNF where
192 metaG (EBNF x) = EBNF x
193 instance Gram_Lexer EBNF
194
195 gram_lexer :: forall g . (Gram_Lexer g, Gram_RuleDef g) => [CF g ()]
196 gram_lexer =
197 [ void $ commentable (void $ rule_arg "space") (void $ rule_arg "line") (void $ rule_arg "block")
198 , void $ comment_line (rule_arg "prefix")
199 , void $ comment_block (rule_arg "start") (rule_arg "end" :: RegL g String)
200 , void $ lexeme (rule_arg "g")
201 , void $ parens (rule_arg "g")
202 , void $ operators (rule_arg "expr") (rule_arg "prefix") (rule_arg "infix") (rule_arg "postfix")
203 , void $ inside id (rule_arg "begin") (rule_arg "i") (rule_arg "end") (rule_arg "next")
204 ]