1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DataKinds #-}
3 {-# LANGUAGE DefaultSignatures #-}
4 {-# LANGUAGE DeriveFunctor #-}
5 {-# LANGUAGE FlexibleContexts #-}
6 {-# LANGUAGE FlexibleInstances #-}
8 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
9 {-# LANGUAGE LambdaCase #-}
10 {-# LANGUAGE NamedFieldPuns #-}
11 {-# LANGUAGE NoMonomorphismRestriction #-}
12 {-# LANGUAGE OverloadedStrings #-}
13 {-# LANGUAGE PolyKinds #-}
14 {-# LANGUAGE Rank2Types #-}
15 {-# LANGUAGE StandaloneDeriving #-}
16 {-# LANGUAGE TypeFamilies #-}
17 {-# OPTIONS_GHC -fno-warn-tabs #-}
18 -- | This module defines symantics
19 -- for regular or context-free grammars.
20 module Language.Symantic.Parsing.Grammar where
22 import Control.Applicative (Applicative(..), Alternative(..))
24 import Data.Char as Char
25 import Data.Foldable hiding (any)
26 import qualified Data.List as List
27 import Data.Semigroup hiding (option)
28 import Data.String (IsString(..))
29 import Data.Text (Text)
30 import qualified Data.Text as Text
31 import Prelude hiding (any)
33 -- * Class 'Gram_Rule'
35 class Gram_Rule p where
36 rule :: Text -> Id (p a)
38 rule1 :: Text -> Id (p a -> p b)
40 rule2 :: Text -> Id (p a -> p b -> p c)
42 rule3 :: Text -> Id (p a -> p b -> p c -> p d)
44 rule4 :: Text -> Id (p a -> p b -> p c -> p d -> p e)
48 -- | Terminal grammar.
50 = Term { unTerm :: p a }
51 deriving (Functor, Gram_Term)
53 -- ** Class 'Gram_Term'
54 -- | Symantics for terminal grammars.
55 class Gram_Term p where
58 char :: Char -> p Char
59 string :: String -> p String
60 unicat :: Unicat -> p Char
61 range :: (Char, Char) -> p Char
62 -- string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "")
63 -- string [] = pure []
64 -- string (c:cs) = (:) <$> char c <*> string cs
67 -- | Unicode category.
74 | Unicat Char.GeneralCategory
77 unicode_categories :: Unicat -> [Char.GeneralCategory]
78 unicode_categories c =
89 , SpacingCombiningMark
98 [ ConnectorPunctuation
113 -- | Left or right regular grammar.
114 newtype Reg (lr::LR) p a = Reg { unReg :: p a }
115 deriving (IsString, Functor, Gram_Term, Alter)
116 deriving instance Gram_Rule p => Gram_Rule (Reg lr p)
117 deriving instance (Functor p, Alter p, Gram_RegL p) => Gram_RegL (RegL p)
118 deriving instance (Functor p, Alter p, Gram_RegR p) => Gram_RegR (RegR p)
129 -- | Like 'Alternative' but without the 'Applicative' super-class,
130 -- because a regular grammar is not closed under 'Applicative'.
133 (<+>) :: p a -> p a -> p a
134 choice :: [p a] -> p a
135 default nil :: Alternative p => p a
136 default (<+>) :: Alternative p => p a -> p a -> p a
137 default choice :: Alternative p => [p a] -> p a
140 choice = foldr (<+>) empty
143 -- ** Class 'Gram_RegR'
144 -- | Symantics for right regular grammars.
145 class (Functor p, Alter p) => Gram_RegR p where
146 (.*>) :: Term p (a -> b) -> RegR p a -> RegR p b
147 manyR :: Term p a -> RegR p [a]
148 manyR p = (:) <$> p .*> manyR p <+> nil
149 someR :: Term p a -> RegR p [a]
150 someR p = (:) <$> p .*> manyR p
153 -- ** Class 'Gram_RegL'
154 -- | Symantics for left regular grammars.
155 class (Functor p, Alter p) => Gram_RegL p where
156 (<*.) :: RegL p (a -> b) -> Term p a -> RegL p b
157 manyL :: Term p a -> RegL p [a]
158 manyL p' = reverse <$> go p'
159 where go p = flip (:) <$> go p <*. p <+> nil
160 someL :: Term p a -> RegL p [a]
161 someL p = (\cs c -> cs ++ [c]) <$> manyL p <*. p
165 -- | Context-free grammar.
166 newtype CF p a = CF { unCF :: p a }
167 deriving (IsString, Functor, Gram_Term, Applicative, App, Alternative, Alter, Alt)
168 deriving instance Gram_Rule p => Gram_Rule (CF p)
169 deriving instance Gram_CF p => Gram_CF (CF p)
171 cf_of_reg :: Reg lr p a -> CF p a
172 cf_of_reg (Reg p) = CF p
174 -- ** Class 'Gram_CF'
175 -- | Symantics for context-free grammars.
176 class Gram_CF p where
177 -- | NOTE: CFL ∩ RL is a CFL.
178 -- See ISBN 81-7808-347-7, Theorem 7.27, p.286
179 (<&) :: CF p (a -> b) -> Reg lr p a -> CF p b
180 (&>) :: Reg lr p (a -> b) -> CF p a -> CF p b
181 -- | NOTE: CFL - RL is a CFL.
182 -- See ISBN 81-7808-347-7, Theorem 7.29, p.289
183 but :: CF p a -> Reg lr p b -> CF p a
188 class Applicative p => App p where
189 between :: p open -> p close -> p a -> p a
190 between open close p = open *> p <* close
193 class Alternative p => Alt p where
194 option :: a -> p a -> p a
195 option x p = p <|> pure x
196 skipMany :: p a -> p ()
197 skipMany = void . many
198 --manyTill :: p a -> p end -> p [a]
199 --manyTill p end = go where go = ([] <$ end) <|> ((:) <$> p <*> go)
202 -- | Extended Bachus-Norm Form, following the
203 -- <http://standards.iso.org/ittf/PubliclyAvailableStandards/s026153_ISO_IEC_14977_1996(E).zip ISO-IEC-14977>
204 -- notations, augmented with the following notations:
206 -- * @("U+", code_point)@: for <http://unicode.org/versions/Unicode8.0.0/ ISO-IEC-10646> (aka. Unicode).
207 -- * @(rule, "&", rule)@: for the intersection.
208 -- * @(rule, "-", rule)@: for the difference.
209 -- * @(rule, " ", rule)@: for rule application.
210 data EBNF a = EBNF { unEBNF :: RuleMode -> (Op, LR) -> Text }
212 runEBNF :: EBNF a -> Text
213 runEBNF (EBNF p) = p RuleMode_Body (nop, L)
215 -- | Get textual rendition of given EBNF rule.
216 renderEBNF :: RuleDef a -> Text
217 renderEBNF = runEBNF . unRuleDef
219 ebnf_const :: Text -> EBNF a
220 ebnf_const t = EBNF $ \_rm _op -> t
223 newtype RuleDef a = RuleDef { unRuleDef :: EBNF a }
224 deriving (Functor, Gram_Term, Applicative, App
225 , Alternative, Alter, Alt, Gram_RegL, Gram_RegR, Gram_CF)
226 deriving instance Gram_RuleDef RuleDef
227 deriving instance Gram_RuleDef p => Gram_RuleDef (RegR p)
228 deriving instance Gram_RuleDef p => Gram_RuleDef (RegL p)
229 deriving instance Gram_RuleDef p => Gram_RuleDef (CF p)
231 instance Gram_Rule RuleDef where
232 rule n = rule_def (ebnf_const n)
233 rule1 n p a = rule_def (ebnf_const n `ebnf_arg` unRuleDef a) (p a)
234 rule2 n p a b = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b) (p a b)
235 rule3 n p a b c = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b `ebnf_arg` unRuleDef c) (p a b c)
236 rule4 n p a b c d = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b `ebnf_arg` unRuleDef c `ebnf_arg` unRuleDef d) (p a b c d)
238 -- *** Class 'Gram_RuleDef'
239 class Gram_RuleDef p where
240 rule_def :: EBNF () -> p a -> RuleDef a
241 rule_arg :: Text -> p a
243 -- | Helper for 'Gram_Rule' 'EBNF'.
244 ebnf_arg :: EBNF a -> EBNF b -> EBNF ()
245 ebnf_arg (EBNF a) (EBNF b) = EBNF $ \bo po -> op_paren po op $
246 a bo (op, L) <> " " <> b bo (op, R)
247 where op = Op " " 11 AssocL
250 instance Gram_RuleDef EBNF where
251 rule_arg = ebnf_const
253 RuleDef $ EBNF $ \mo po ->
255 RuleMode_Ref -> unEBNF call mo po
257 Text.intercalate " " $ concat $
258 [ [unEBNF call RuleMode_Ref (nop, L)]
260 , [unEBNF body RuleMode_Ref (nop, R)]
263 instance IsString (EBNF String) where
265 instance Show (EBNF a) where
266 show = Text.unpack . runEBNF
267 instance Gram_Rule EBNF where
268 rule n p = EBNF $ \rm po ->
270 RuleMode_Body -> unEBNF p RuleMode_Ref po
272 rule1 n p a = EBNF $ \rm po ->
274 RuleMode_Body -> unEBNF (p a) RuleMode_Ref po
275 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a) RuleMode_Ref po
276 rule2 n p a b = EBNF $ \rm po ->
278 RuleMode_Body -> unEBNF (p a b) RuleMode_Ref po
279 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b) RuleMode_Ref po
280 rule3 n p a b c = EBNF $ \rm po ->
282 RuleMode_Body -> unEBNF (p a b c) RuleMode_Ref po
283 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b `ebnf_arg` c) RuleMode_Ref po
284 rule4 n p a b c d = EBNF $ \rm po ->
286 RuleMode_Body -> unEBNF (p a b c d) RuleMode_Ref po
287 RuleMode_Ref -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b `ebnf_arg` c `ebnf_arg` d) RuleMode_Ref po
288 instance Functor EBNF where
289 fmap _f (EBNF x) = EBNF x
290 instance Applicative EBNF where
292 EBNF f <*> EBNF x = EBNF $ \bo po -> op_paren po op $
293 f bo (op, L) <> ", " <> x bo (op, R)
294 where op = Op "," 10 AssocB
296 instance Alternative EBNF where
297 empty = ebnf_const $ "\"\""
298 EBNF x <|> EBNF y = EBNF $ \bo po -> op_paren po op $
299 x bo (op, L) <> " | " <> y bo (op, R)
300 where op = Op "|" 2 AssocB
301 many (EBNF x) = EBNF $ \rm _po -> "{ " <> x rm (op, L) <> " }" where op = nop
302 some (EBNF x) = EBNF $ \rm _po -> "{ " <> x rm (op, L) <> " }-" where op = nop
303 instance Alter EBNF where
306 choice l@(_:_) = EBNF $ \bo po -> op_paren po op $
307 Text.intercalate " | " $
308 (unEBNF <$> l) <*> pure bo <*> pure (op, L)
309 where op = Op "|" 2 AssocB
311 instance Gram_Term EBNF where
313 eof = ebnf_const "EOF"
314 char = ebnf_const . escape
316 escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""]
317 escape c = Text.concat ["U+", Text.pack $ show $ ord c]
319 case List.break (\c -> not (Char.isPrint c) || c == '"') s of
321 ("", [c]) -> "" <$ char c
322 (ps, [c]) -> "" <$ raw ps <* char c
323 ("", c:rs) -> "" <$ char c <* string rs
324 (ps, c:rs) -> "" <$ raw ps <* char c <* string rs
326 raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""]
327 unicat = ebnf_const . Text.pack . show
328 range (l, h) = ebnf_const $ Text.concat
333 instance Gram_RegR EBNF where
334 Term f .*> Reg x = Reg $ f <*> x
335 manyR = Reg . many . unTerm
336 someR = Reg . some . unTerm
337 instance Gram_RegL EBNF where
338 Reg f <*. Term x = Reg $ f <*> x
339 manyL = Reg . many . unTerm
340 someL = Reg . some . unTerm
341 instance Gram_CF EBNF where
342 CF (EBNF f) <& Reg (EBNF p) = CF $ EBNF $ \bo po -> op_paren po op $
343 f bo (op, L) <> " & " <> p bo (op, R)
344 where op = Op "&" 4 AssocL
345 Reg (EBNF f) &> CF (EBNF p) = CF $ EBNF $ \bo po -> op_paren po op $
346 f bo (op, L) <> " & " <> p bo (op, R)
347 where op = Op "&" 4 AssocL
348 CF (EBNF f) `but` Reg (EBNF p) = CF $ EBNF $ \bo po -> op_paren po op $
349 f bo (op, L) <> " - " <> p bo (op, R)
350 where op = Op "-" 6 AssocL
352 -- ** Type 'RuleMode'
354 = RuleMode_Body -- ^ Generate the body of the rule.
355 | RuleMode_Ref -- ^ Generate a ref to the rule.
361 , op_prece :: Precedence
362 , op_assoc :: Associativity
363 } deriving (Eq, Show)
368 -- *** Type 'Precedence'
369 type Precedence = Int
371 -- *** Type 'Associativity'
373 = AssocL | AssocR | AssocN | AssocB
377 :: (Semigroup s, IsString s)
378 => (Op, LR) -> Op -> s -> s
379 op_paren (po, lr) op s =
380 if op_prece op <= op_prece po && not associate
381 then fromString "(" <> s <> fromString ")"
385 op_ident po == op_ident op &&
386 case (lr, op_assoc po) of
392 -- * Class 'Gram_Context'
393 -- | A monadic backdoor, but limited by 'Context'.
394 -- In 'CF', the context must obviously not be used to change the parser,
395 -- but it can be used to change the parsed value,
396 -- for instance by recording source positions into it.
397 class Gram_Context p where
400 default context :: (Context p ~ ()) => (Context p -> p a) -> p a
401 context :: (Context p -> p a) -> p a
403 instance Gram_Context p => Gram_Context (CF p) where
404 type Context (CF p) = Context p
405 context f = CF $ context (unCF . f)
406 instance Gram_Context EBNF
407 instance Gram_Context RuleDef
409 -- * Class 'Gram_Lexer'
418 ) => Gram_Lexer p where
419 commentable :: p () -> p () -> p () -> p ()
420 commentable = rule3 "commentable" $ \p line block ->
421 skipMany $ choice [p, line, block]
422 comment_line :: CF p String -> CF p String
423 comment_line prefix = rule "comment_line" $
424 prefix *> many (any `but` (void (char '\n') <+> eof))
425 comment_block :: CF p String -> Reg lr p String -> CF p String
426 comment_block start end = rule "comment_block" $
427 start *> many (any `but` void end)
428 lexeme :: CF p a -> CF p a
429 lexeme = rule1 "lexeme" $ \p -> p
432 (void $ comment_line (string "--"))
433 (void $ comment_block (string "{-") (string "-}"))
434 parens :: CF p a -> CF p a
435 parens = rule1 "parens" $
437 (lexeme $ string "(")
438 (lexeme $ string ")")
439 infixrP :: (a -> a -> a) -> CF p a -> CF p sep -> CF p a -> CF p a
441 rule3 "infixrP" $ \next sep root ->
442 (\a -> \case Just b -> f a b; Nothing -> a)
443 <$> next <*> option Nothing (Just <$ sep <*> root)
444 inside :: (a -> b) -> CF p begin -> CF p a -> CF p end -> CF p b -> CF p b
445 inside f = rule4 "inside" $ \begin i end n ->
446 (f <$ begin <*> i <* end) <+> n
447 symbol :: String -> CF p String
448 symbol = lexeme . string
450 deriving instance Gram_Lexer p => Gram_Lexer (CF p)
451 instance Gram_Lexer EBNF
452 instance Gram_Lexer RuleDef
454 gram_lexer :: forall p . (Gram_Lexer p, Gram_RuleDef p) => [CF p ()]
456 [ () <$ commentable (void $ rule_arg "space") (void $ rule_arg "line") (void $ rule_arg "block")
457 , () <$ comment_line (rule_arg "prefix")
458 , () <$ comment_block (rule_arg "start") (rule_arg "end" :: Reg 'L p String)
459 , () <$ lexeme (rule_arg "p")
460 , () <$ parens (rule_arg "p")
461 , () <$ inside id (rule_arg "begin") (rule_arg "i") (rule_arg "end") (rule_arg "next")
462 , () <$ infixrP const (rule_arg "next") (rule_arg "sep") (rule_arg "root")