]> Git — Sourcephile - haskell/symantic.git/blob - Language/Symantic/Parsing/EBNF.hs
Move libraries in Lib.
[haskell/symantic.git] / Language / Symantic / Parsing / EBNF.hs
1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE NoMonomorphismRestriction #-}
5 {-# LANGUAGE OverloadedStrings #-}
6 {-# LANGUAGE StandaloneDeriving #-}
7 {-# OPTIONS_GHC -fno-warn-tabs #-}
8 module Language.Symantic.Parsing.EBNF where
9
10 import Control.Applicative (Applicative(..), Alternative(..))
11 import Control.Monad
12 import Data.Bool as Bool
13 import Data.Char as Char
14 import qualified Data.List as List
15 import Data.Semigroup hiding (option)
16 import Data.String (IsString(..))
17 import Data.Text (Text)
18 import qualified Data.Text as Text
19 import Prelude hiding (any)
20
21 import Language.Symantic.Parsing.Grammar
22
23 -- * Type 'EBNF'
24 -- | Extended Backus-Naur-Form, following the
25 -- <http://standards.iso.org/ittf/PubliclyAvailableStandards/s026153_ISO_IEC_14977_1996(E).zip ISO-IEC-14977>
26 -- notations, augmented with the following notations:
27 --
28 -- * @("U+", code_point)@: for <http://unicode.org/versions/Unicode8.0.0/ ISO-IEC-10646> (aka. Unicode).
29 -- * @(char, "…", char)@: for character range.
30 -- * @(rule, "&", rule)@: for the intersection.
31 -- * @(rule, "-", rule)@: for the difference.
32 -- * @(rule, " ", rule)@: for rule application.
33 data EBNF a = EBNF { unEBNF :: RuleMode -> (Infix, LR) -> Text }
34
35 runEBNF :: EBNF a -> Text
36 runEBNF (EBNF g) = g RuleMode_Body (infixN0, L)
37
38 -- | Get textual rendition of given EBNF rule.
39 renderEBNF :: RuleDef a -> Text
40 renderEBNF = runEBNF . unRuleDef
41
42 ebnf_const :: Text -> EBNF a
43 ebnf_const t = EBNF $ \_rm _op -> t
44
45 -- ** Type 'RuleMode'
46 data RuleMode
47 = RuleMode_Body -- ^ Generate the body of the rule.
48 | RuleMode_Ref -- ^ Generate a ref to the rule.
49 deriving (Eq, Show)
50
51 -- ** Type 'RuleDef'
52 newtype RuleDef a = RuleDef { unRuleDef :: EBNF a }
53 deriving (Functor, Gram_Terminal, Applicative, App
54 , Alternative, Alter, Alt, Gram_RegL, Gram_RegR, Gram_CF)
55 deriving instance Gram_RuleDef RuleDef
56 deriving instance Gram_RuleDef g => Gram_RuleDef (RegR g)
57 deriving instance Gram_RuleDef g => Gram_RuleDef (RegL g)
58 deriving instance Gram_RuleDef g => Gram_RuleDef (CF g)
59 instance Gram_Rule RuleDef where
60 rule n = rule_def (ebnf_const n)
61 rule1 n g a = rule_def (ebnf_const n `ebnf_arg` unRuleDef a) (g a)
62 rule2 n g a b = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b) (g a b)
63 rule3 n g a b c = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b `ebnf_arg` unRuleDef c) (g a b c)
64 rule4 n g a b c d = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b `ebnf_arg` unRuleDef c `ebnf_arg` unRuleDef d) (g a b c d)
65 instance Gram_Meta meta RuleDef where
66 metaG (RuleDef x) = RuleDef $ metaG x
67 instance Gram_Lexer RuleDef
68
69 -- *** Class 'Gram_RuleDef'
70 class Gram_RuleDef g where
71 rule_def :: EBNF () -> g a -> RuleDef a
72 rule_arg :: Text -> g a
73
74 -- | Helper for 'Gram_Rule' 'EBNF'.
75 ebnf_arg :: EBNF a -> EBNF b -> EBNF ()
76 ebnf_arg (EBNF a) (EBNF b) = EBNF $ \bo po -> infix_paren po op $
77 a bo (op, L) <> " " <> b bo (op, R)
78 where op = infixL 11
79 infixl 5 `ebnf_arg`
80
81 instance Gram_RuleDef EBNF where
82 rule_arg = ebnf_const
83 rule_def call body =
84 RuleDef $ EBNF $ \mo po ->
85 case mo of
86 RuleMode_Ref -> unEBNF call mo po
87 RuleMode_Body ->
88 Text.intercalate " " $
89 [ unEBNF call RuleMode_Ref (infixN0, L)
90 , "="
91 , unEBNF body RuleMode_Ref (infixN0, R)
92 , ";"
93 ]
94 instance IsString (EBNF String) where
95 fromString = string
96 instance Show (EBNF a) where
97 show = Text.unpack . runEBNF
98 instance Gram_Rule EBNF where
99 rule n g = EBNF $ \rm po ->
100 case rm of
101 RuleMode_Body -> unEBNF g RuleMode_Ref po
102 RuleMode_Ref -> n
103 rule1 n g a = EBNF $ \rm po ->
104 case rm of
105 RuleMode_Body -> unEBNF (g a) RuleMode_Ref po
106 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a) RuleMode_Ref po
107 rule2 n g a b = EBNF $ \rm po ->
108 case rm of
109 RuleMode_Body -> unEBNF (g a b) RuleMode_Ref po
110 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b) RuleMode_Ref po
111 rule3 n g a b c = EBNF $ \rm po ->
112 case rm of
113 RuleMode_Body -> unEBNF (g a b c) RuleMode_Ref po
114 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b `ebnf_arg` c) RuleMode_Ref po
115 rule4 n g a b c d = EBNF $ \rm po ->
116 case rm of
117 RuleMode_Body -> unEBNF (g a b c d) RuleMode_Ref po
118 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b `ebnf_arg` c `ebnf_arg` d) RuleMode_Ref po
119 instance Functor EBNF where
120 fmap _f (EBNF x) = EBNF x
121 instance Applicative EBNF where
122 pure _ = empty
123 EBNF f <*> EBNF x = EBNF $ \bo po -> infix_paren po op $
124 f bo (op, L) <> ", " <> x bo (op, R)
125 where op = infixB L 10
126 instance App EBNF
127 instance Alternative EBNF where
128 empty = ebnf_const $ "\"\""
129 EBNF g <|> EBNF q = EBNF $ \bo po -> infix_paren po op $
130 g bo (op, L) <> " | " <> q bo (op, R)
131 where op = infixB L 2
132 many (EBNF g) = EBNF $ \rm _po -> "{" <> g rm (op, L) <> "}" where op = infixN0
133 some (EBNF g) = EBNF $ \rm _po -> "{" <> g rm (op, L) <> "}-" where op = infixN0
134 instance Alter EBNF where
135 nil = ebnf_const $ "\"\""
136 choice [] = empty
137 choice [g] = g
138 choice l@(_:_) = EBNF $ \bo po -> infix_paren po op $
139 Text.intercalate " | " $
140 (unEBNF <$> l) <*> pure bo <*> pure (op, L)
141 where op = infixB L 2
142 star (EBNF g) = EBNF $ \rm _po -> "{" <> g rm (op, L) <> "}" where op = infixN0
143 instance Alt EBNF where
144 option _x (EBNF g) = EBNF $ \rm _po ->
145 "[" <> g rm (op, L) <> "]" where op = infixN0
146 instance Gram_Terminal EBNF where
147 any = ebnf_const "_"
148 Terminal (EBNF f) `but` Terminal (EBNF g) =
149 Terminal $ EBNF $ \bo po -> infix_paren po op $
150 f bo (op, L) <> " - " <> g bo (op, R)
151 where op = infixL 6
152 eoi = ebnf_const "EOF"
153 char = ebnf_const . escape
154 where
155 escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""]
156 escape c = Text.concat ["U+", Text.pack $ show $ ord c]
157 string s =
158 case List.break (\c -> Bool.not (Char.isPrint c) || c == '"') s of
159 (ps, "") -> raw ps
160 ("", [c]) -> "" <$ char c
161 (ps, [c]) -> "" <$ raw ps <* char c
162 ("", c:rs) -> "" <$ char c <* string rs
163 (ps, c:rs) -> "" <$ raw ps <* char c <* string rs
164 where
165 raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""]
166 unicat = ebnf_const . Text.pack . show
167 range (l, h) = ebnf_const $ Text.concat
168 [ runEBNF $ char l
169 , "…"
170 , runEBNF $ char h
171 ]
172 instance Gram_RegR EBNF where
173 Terminal f .*> Reg x = Reg $ f <*> x
174 manyR = Reg . many . unTerminal
175 someR = Reg . some . unTerminal
176 instance Gram_RegL EBNF where
177 Reg f <*. Terminal x = Reg $ f <*> x
178 manyL = Reg . many . unTerminal
179 someL = Reg . some . unTerminal
180 instance Gram_CF EBNF where
181 CF (EBNF f) <& Reg (EBNF g) = CF $ EBNF $ \bo po -> infix_paren po op $
182 f bo (op, L) <> " & " <> g bo (op, R)
183 where op = infixB L 4
184 Reg (EBNF f) &> CF (EBNF g) = CF $ EBNF $ \bo po -> infix_paren po op $
185 f bo (op, L) <> " & " <> g bo (op, R)
186 where op = infixB L 4
187 CF (EBNF f) `minus` Reg (EBNF g) = CF $ EBNF $ \bo po -> infix_paren po op $
188 f bo (op, L) <> " - " <> g bo (op, R)
189 where op = infixL 6
190 instance Gram_Meta meta EBNF where
191 metaG (EBNF x) = EBNF x
192 instance Gram_Lexer EBNF
193
194 gram_lexer :: forall g . (Gram_Lexer g, Gram_RuleDef g) => [CF g ()]
195 gram_lexer =
196 [ void $ commentable (void $ rule_arg "space") (void $ rule_arg "line") (void $ rule_arg "block")
197 , void $ comment_line (rule_arg "prefix")
198 , void $ comment_block (rule_arg "start") (rule_arg "end" :: RegL g String)
199 , void $ lexeme (rule_arg "g")
200 , void $ parens (rule_arg "g")
201 , void $ operators (rule_arg "expr") (rule_arg "prefix") (rule_arg "infix") (rule_arg "postfix")
202 , void $ inside id (rule_arg "begin") (rule_arg "i") (rule_arg "end") (rule_arg "next")
203 ]