{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE StandaloneDeriving #-} -- | This module defines symantics -- for regular or context-free grammars. -- -- The default grammar can be printed in 'EBNF' -- with: @cabal test ebnf --show-details=always@. module Language.Symantic.Parsing.Grammar where import Control.Applicative (Applicative(..)) import Control.Monad import qualified Data.Bool as Bool import qualified Data.Char as Char import Data.Foldable hiding (any) import Data.Semigroup hiding (option) import Data.String (IsString(..)) import Data.Text (Text) import Prelude hiding (any) -- * Class 'Gram_Rule' type Id a = a -> a class Gram_Rule g where rule :: Text -> Id (g a) rule _n = id rule1 :: Text -> Id (g a -> g b) rule1 _n g = g rule2 :: Text -> Id (g a -> g b -> g c) rule2 _n g = g rule3 :: Text -> Id (g a -> g b -> g c -> g d) rule3 _n g = g rule4 :: Text -> Id (g a -> g b -> g c -> g d -> g e) rule4 _n g = g -- * Type 'Terminal' -- | Terminal grammar. newtype Terminal g a = Terminal { unTerminal :: g a } deriving (Functor, Gram_Terminal) deriving instance Gram_Rule g => Gram_Rule (Terminal g) -- ** Class 'Gram_Terminal' -- | Symantics for terminal grammars. class Gram_Terminal g where any :: g Char but :: Terminal g Char -> Terminal g Char -> Terminal g Char eoi :: g () char :: Char -> g Char string :: String -> g String unicat :: Unicat -> g Char range :: (Char, Char) -> g Char -- string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "") -- string [] = pure [] -- string (c:cs) = (:) <$> char c <*> string cs -- *** Type 'Unicat' -- | Unicode category. data Unicat = Unicat_Letter | Unicat_Mark | Unicat_Number | Unicat_Punctuation | Unicat_Symbol | Unicat Char.GeneralCategory deriving (Eq, Show) unicode_categories :: Unicat -> [Char.GeneralCategory] unicode_categories c = case c of Unicat_Letter -> [ Char.UppercaseLetter , Char.LowercaseLetter , Char.TitlecaseLetter , Char.ModifierLetter , Char.OtherLetter ] Unicat_Mark -> [ Char.NonSpacingMark , Char.SpacingCombiningMark , Char.EnclosingMark ] Unicat_Number -> [ Char.DecimalNumber , Char.LetterNumber , Char.OtherNumber ] Unicat_Punctuation -> [ Char.ConnectorPunctuation , Char.DashPunctuation , Char.OpenPunctuation , Char.ClosePunctuation , Char.OtherPunctuation ] Unicat_Symbol -> [ Char.MathSymbol , Char.CurrencySymbol , Char.ModifierSymbol , Char.OtherSymbol ] Unicat cat -> [cat] -- * Type 'Reg' -- | Left or right regular grammar. newtype Reg (lr::LR) g a = Reg { unReg :: g a } deriving (IsString, Functor, Gram_Terminal) deriving instance Alter g => Alter (Reg lr g) deriving instance Gram_Rule g => Gram_Rule (Reg lr g) deriving instance (Functor g, Alter g, Gram_RegL g) => Gram_RegL (RegL g) deriving instance (Functor g, Alter g, Gram_RegR g) => Gram_RegR (RegR g) reg_of_term :: Terminal g a -> Reg lr g a reg_of_term (Terminal g) = Reg g -- ** Type 'LR' data LR = L -- ^ Left | R -- ^ Right deriving (Eq, Show) type RegL = Reg 'L type RegR = Reg 'R -- ** Class 'Alter' -- | Like 'Alternative' but without the 'Applicative' super-class, -- because a regular grammar is not closed under 'Applicative'. -- And also because the alternative operator has to backtrack -- when the first alternative fails. class Alter g where empty :: g a (<+>) :: g a -> g a -> g a choice :: [g a] -> g a choice = foldr (<+>) empty deriving instance Alter p => Alter (Terminal p) infixl 3 <+> -- ** Class 'Gram_RegR' -- | Symantics for right regular grammars. class (Functor g, Alter g) => Gram_RegR g where (.*>) :: Terminal g (a -> b) -> RegR g a -> RegR g b manyR :: Terminal g a -> RegR g [a] manyR g = (:) <$> g .*> manyR g <+> empty someR :: Terminal g a -> RegR g [a] someR g = (:) <$> g .*> manyR g infixl 4 .*> -- ** Class 'Gram_RegL' -- | Symantics for left regular grammars. class (Functor g, Alter g) => Gram_RegL g where (<*.) :: RegL g (a -> b) -> Terminal g a -> RegL g b manyL :: Terminal g a -> RegL g [a] manyL g' = reverse <$> go g' where go g = flip (:) <$> go g <*. g <+> empty someL :: Terminal g a -> RegL g [a] someL g = (\cs c -> cs ++ [c]) <$> manyL g <*. g infixl 4 <*. -- * Class 'Alt' class (Alter g, Applicative g) => Alt g where option :: a -> g a -> g a option x g = g <+> pure x optional :: g a -> g (Maybe a) optional v = Just <$> v <+> pure Nothing many :: g a -> g [a] many a = some a <+> pure [] some :: g a -> g [a] some a = (:) <$> a <*> many a skipMany :: g a -> g () skipMany = void . many --manyTill :: g a -> g end -> g [a] --manyTill g end = go where go = ([] <$ end) <|> ((:) <$> g <*> go) -- * Class 'App' class Applicative g => App g where between :: g open -> g close -> g a -> g a between open close g = open *> g <* close -- * Type 'CF' -- | Context-free grammar. newtype CF g a = CF { unCF :: g a } deriving (IsString, Functor, Gram_Terminal, Applicative, App) deriving instance Alter g => Alter (CF g) deriving instance Alt g => Alt (CF g) deriving instance Gram_Rule g => Gram_Rule (CF g) deriving instance Gram_RegL g => Gram_RegL (CF g) deriving instance Gram_RegR g => Gram_RegR (CF g) deriving instance Gram_CF g => Gram_CF (CF g) cf_of_term :: Terminal g a -> CF g a cf_of_term (Terminal g) = CF g cf_of_reg :: Reg lr g a -> CF g a cf_of_reg (Reg g) = CF g -- ** Class 'Gram_CF' -- | Symantics for context-free grammars. class Gram_CF g where -- | NOTE: CFL ∩ RL is a CFL. -- See ISBN 81-7808-347-7, Theorem 7.27, g.286 (<&) :: CF g (a -> b) -> Reg lr g a -> CF g b (&>) :: Reg lr g (a -> b) -> CF g a -> CF g b -- | NOTE: CFL - RL is a CFL. -- See ISBN 81-7808-347-7, Theorem 7.29, g.289 minus :: CF g a -> Reg lr g b -> CF g a infixl 4 <& infixl 4 &> -- * Class 'Gram_Meta' class Gram_Meta meta g where metaG :: g (meta -> a) -> g a instance Gram_Meta meta g => Gram_Meta meta (CF g) where metaG = CF . metaG . unCF -- * Class 'Gram_Lexer' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Rule g , Gram_Terminal g ) => Gram_Lexer g where commentable :: g () -> g () -> g () -> g () commentable = rule3 "commentable" $ \g line block -> skipMany $ choice [g, line, block] comment_line :: CF g String -> CF g String comment_line prefix = rule "comment_line" $ prefix *> many (any `minus` (void (char '\n') <+> eoi)) comment_block :: CF g String -> Reg lr g String -> CF g String comment_block start end = rule "comment_block" $ start *> many (any `minus` void end) lexeme :: CF g a -> CF g a lexeme = rule1 "lexeme" $ \g -> g <* commentable (void $ char ' ') (void $ comment_line (string "--")) (void $ comment_block (string "{-") (string "-}")) parens :: CF g a -> CF g a parens = rule1 "parens" $ between (lexeme $ string "(") (lexeme $ string ")") operators :: CF g a -- ^ expression -> CF g (Unifix, a -> a) -- ^ prefix operator -> CF g (Infix , a -> a -> a) -- ^ infix operator -> CF g (Unifix, a -> a) -- ^ postfix operator -> CF g (Either Error_Fixity a) operators g prG iG poG = (evalOpTree <$>) <$> go g prG iG poG where go :: CF g a -> CF g (Unifix, a -> a) -> CF g (Infix , a -> a -> a) -> CF g (Unifix, a -> a) -> CF g (Either Error_Fixity (OpTree a)) go = rule4 "operators" $ \aG preG inG postG -> (\pres a posts -> let nod_a = foldr insert_unifix (foldl' (flip insert_unifix) (OpNode0 a) posts) pres in \case Just (in_, b) -> insert_infix nod_a in_ b Nothing -> Right nod_a) <$> many preG <*> aG <*> many postG <*> option Nothing (curry Just <$> inG <*> go aG preG inG postG) insert_unifix :: (Unifix, a -> a) -> OpTree a -> OpTree a insert_unifix a@(uni_a@(Prefix prece_a), op_a) nod_b = case nod_b of OpNode0{} -> OpNode1 uni_a op_a nod_b OpNode1 Prefix{} _op_b _nod -> OpNode1 uni_a op_a nod_b OpNode1 uni_b@(Postfix prece_b) op_b nod -> case prece_b `compare` prece_a of GT -> OpNode1 uni_a op_a nod_b EQ -> OpNode1 uni_a op_a nod_b LT -> OpNode1 uni_b op_b $ insert_unifix a nod OpNode2 inf_b op_b l r -> case infix_prece inf_b `compare` prece_a of GT -> OpNode1 uni_a op_a nod_b EQ -> OpNode1 uni_a op_a nod_b LT -> OpNode2 inf_b op_b (insert_unifix a l) r insert_unifix a@(uni_a@(Postfix prece_a), op_a) nod_b = case nod_b of OpNode0{} -> OpNode1 uni_a op_a nod_b OpNode1 uni_b@(Prefix prece_b) op_b nod -> case prece_b `compare` prece_a of GT -> OpNode1 uni_a op_a nod_b EQ -> OpNode1 uni_a op_a nod_b LT -> OpNode1 uni_b op_b $ insert_unifix a nod OpNode1 Postfix{} _op_b _nod -> OpNode1 uni_a op_a nod_b OpNode2 inf_b op_b l r -> case infix_prece inf_b `compare` prece_a of GT -> OpNode1 uni_a op_a nod_b EQ -> OpNode1 uni_a op_a nod_b LT -> OpNode2 inf_b op_b l (insert_unifix a r) insert_infix :: OpTree a -> (Infix, a -> a -> a) -> Either Error_Fixity (OpTree a) -> Either Error_Fixity (OpTree a) insert_infix nod_a in_@(inf_a, op_a) e_nod_b = do nod_b <- e_nod_b case nod_b of OpNode0{} -> Right $ OpNode2 inf_a op_a nod_a nod_b OpNode1 uni_b op_b nod -> case unifix_prece uni_b `compare` infix_prece inf_a of EQ -> Right $ OpNode2 inf_a op_a nod_a nod_b GT -> Right $ OpNode2 inf_a op_a nod_a nod_b LT -> do n <- insert_infix nod_a in_ (Right nod) Right $ OpNode1 uni_b op_b n OpNode2 inf_b op_b l r -> case infix_prece inf_b `compare` infix_prece inf_a of GT -> Right $ OpNode2 inf_a op_a nod_a nod_b LT -> do n <- insert_infix nod_a in_ (Right l) Right $ OpNode2 inf_b op_b n r EQ -> let ass = \case AssocL -> L AssocR -> R AssocB lr -> lr in case (ass <$> infix_assoc inf_b, ass <$> infix_assoc inf_a) of (Just L, Just L) -> do n <- insert_infix nod_a in_ (Right l) Right $ OpNode2 inf_b op_b n r (Just R, Just R) -> Right $ OpNode2 inf_a op_a nod_a nod_b _ -> Left $ Error_Fixity_Infix_not_combinable inf_a inf_b -- NOTE: non-associating infix ops -- of the same precedence cannot be mixed. infixrG :: CF g a -> CF g (a -> a -> a) -> CF g a infixrG = rule2 "infixr" $ \g opG -> (\a -> \case Just (op, b) -> a `op` b Nothing -> a) <$> g <*> option Nothing (curry Just <$> opG <*> infixrG g opG) infixlG :: CF g a -> CF g (a -> a -> a) -> CF g a infixlG = rule2 "infixl" $ \g opG -> -- NOTE: infixl uses the same grammar than infixr, -- but build the parsed value by applying -- the operator in the opposite way. ($ id) <$> go g opG where go :: CF g a -> CF g (a -> a -> a) -> CF g ((a -> a) -> a) go g opG = (\a -> \case Just (op, kb) -> \k -> kb (k a `op`) Nothing -> ($ a)) <$> g <*> option Nothing (curry Just <$> opG <*> go g opG) inside :: (a -> b) -> CF g begin -> CF g a -> CF g end -> CF g b -> CF g b inside f = rule4 "inside" $ \begin i end n -> (f <$ begin <*> i <* end) <+> n symbol :: String -> CF g String symbol = lexeme . string deriving instance Gram_Lexer g => Gram_Lexer (CF g) -- ** Type 'Error_Fixity' data Error_Fixity = Error_Fixity_Infix_not_combinable Infix Infix | Error_Fixity_NeedPostfixOrInfix | Error_Fixity_NeedPrefix | Error_Fixity_NeedPostfix | Error_Fixity_NeedInfix deriving (Eq, Show) -- ** Type 'NeedFixity' data NeedFixity = NeedPrefix | NeedPostfix | NeedPostfixOrInfix deriving (Eq, Ord, Show) -- ** Type 'Fixity' data Fixity a = FixityPrefix Unifix (a -> a) | FixityPostfix Unifix (a -> a) | FixityInfix Infix (a -> a -> a) -- ** Type 'Precedence' type Precedence = Int -- ** Type 'Associativity' -- type Associativity = LR data Associativity = AssocL -- ^ Associate to the left: @a ¹ b ² c == (a ¹ b) ² c@ | AssocR -- ^ Associate to the right: @a ¹ b ² c == a ¹ (b ² c)@ | AssocB LR -- ^ Associate to both side, but to 'LR' when reading. deriving (Eq, Show) -- ** Type 'Unifix' data Unifix = Prefix { unifix_prece :: Precedence } | Postfix { unifix_prece :: Precedence } deriving (Eq, Show) -- ** Type 'Infix' data Infix = Infix { infix_assoc :: Maybe Associativity , infix_prece :: Precedence } deriving (Eq, Show) infixL :: Precedence -> Infix infixL = Infix (Just AssocL) infixR :: Precedence -> Infix infixR = Infix (Just AssocR) infixB :: LR -> Precedence -> Infix infixB = Infix . Just . AssocB infixN :: Precedence -> Infix infixN = Infix Nothing infixN0 :: Infix infixN0 = infixN 0 infixN5 :: Infix infixN5 = infixN 5 infix_paren :: (Semigroup s, IsString s) => (Infix, LR) -> Infix -> s -> s infix_paren (po, lr) op s = if infix_prece op < infix_prece po || infix_prece op == infix_prece po && Bool.not associate then fromString "(" <> s <> fromString ")" else s where associate = case (lr, infix_assoc po) of (_, Just AssocB{}) -> True (L, Just AssocL) -> True (R, Just AssocR) -> True _ -> False -- ** Type 'OpTree' data OpTree a = OpNode0 a | OpNode1 Unifix (a -> a) (OpTree a) | OpNode2 Infix (a -> a -> a) (OpTree a) (OpTree a) -- | Collapse an 'OpTree'. evalOpTree :: OpTree a -> a evalOpTree (OpNode0 a) = a evalOpTree (OpNode1 _uni op n) = op $ evalOpTree n evalOpTree (OpNode2 _inf op l r) = evalOpTree l `op` evalOpTree r