{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE UndecidableInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.Symantic.Compiling.Term.Grammar where import Control.Arrow (left) import Control.Monad (void) import Data.Map.Strict (Map) import Data.Proxy (Proxy(..)) import Data.Semigroup (Semigroup(..)) import Data.Text (Text) import Prelude hiding (mod, not, any) import qualified Data.Char as Char import qualified Data.Function as Fun import qualified Data.Map.Strict as Map import qualified Data.Text as Text import Language.Symantic.Parsing import Language.Symantic.Typing -- * Type 'Mod' type Mod_Path = [Mod_Name] newtype Mod_Name = Mod_Name Text deriving (Eq, Ord, Show) data Mod a = Mod Mod_Path a deriving (Eq, Functor, Ord, Show) -- * Type 'AST_Term' -- type AST_Term src ss = BinTree (EToken src ss) type AST_Term src ss = BinTree (Token_Term src ss) -- ** Type 'Token_Term' data Token_Term src ss = Token_Term (EToken src ss) | Token_Term_Abst src TeName (AST_Type src) (AST_Term src ss) | Token_Term_App src -- (AST_Term src ss) (AST_Term src ss) | Token_Term_Var src TeName | Token_Term_Let src TeName (AST_Term src ss) (AST_Term src ss) {- | Token_Term_Type (AST_Type src) -} instance Eq_Token ss => Eq (Token_Term src ss) where Token_Term x == Token_Term y = x == y (==) (Token_Term_Abst _ nx vx bx) (Token_Term_Abst _ ny vy by) = nx == ny && vx == vy && bx == by (==) (Token_Term_App _) (Token_Term_App _) = True (==) (Token_Term_Var _ nx) (Token_Term_Var _ ny) = nx == ny (==) (Token_Term_Let _ nx vx bx) (Token_Term_Let _ ny vy by) = nx == ny && vx == vy && bx == by (==) _ _ = False instance Show_Token ss => Show (Token_Term src ss) where showsPrec p (Token_Term t) = showsPrec p t showsPrec _p (Token_Term_App _) = showString "Token_Term_App" showsPrec p (Token_Term_Abst _ n v b) = showParen (p > 10) $ showString "Token_Term_Abst" . showChar ' ' . showsPrec 11 n . showChar ' ' . showsPrec 11 v . showChar ' ' . showsPrec 11 b showsPrec p (Token_Term_Var _ n) = showParen (p > 10) $ showString "Token_Term_Var" . showChar ' ' . showsPrec 11 n showsPrec p (Token_Term_Let _ n v b) = showParen (p > 10) $ showString "Token_Term_Let" . showChar ' ' . showsPrec 11 n . showChar ' ' . showsPrec 11 v . showChar ' ' . showsPrec 11 b -- * Class 'Tokenize' type Tokenize src ss = TokenizeR src ss ss -- ** Type 'Tokenizer' data Tokenizer fixy src ss = Tokenizer { tokenizer :: src -> Token_Term src ss , tokenizer_fixity :: fixy } -- ** Type 'Tokenizers' data Tokenizers src ss = Tokenizers { tokenizers_prefix :: Map Mod_Path (Map TeName (Tokenizer Unifix src ss)) , tokenizers_infix :: Map Mod_Path (Map TeName (Tokenizer Infix src ss)) , tokenizers_postfix :: Map Mod_Path (Map TeName (Tokenizer Unifix src ss)) } deriving instance ( Show (Tokenizer Unifix src ss) , Show (Tokenizer Infix src ss) ) => Show (Tokenizers src ss) instance Semigroup (Tokenizers src ss) where x <> y = Tokenizers (Map.unionWith Map.union (tokenizers_prefix x) (tokenizers_prefix y)) (Map.unionWith Map.union (tokenizers_infix x) (tokenizers_infix y)) (Map.unionWith Map.union (tokenizers_postfix x) (tokenizers_postfix y)) instance Monoid (Tokenizers src ss) where mempty = Tokenizers Map.empty Map.empty Map.empty mappend = (<>) tokenizers :: forall src ss. Tokenize src ss => Tokenizers src ss tokenizers = tokenizeR (Proxy @ss) -- | Lookup the given 'Mod' 'TeName' into the given 'Tokenizers', -- returning for prefix, infix and postfix positions, when there is a match. tokenizer_lookup :: forall src ss. Inj_Token ss (->) => Mod TeName -> Tokenizers src ss -> ( Maybe (Tokenizer Unifix src ss) , Maybe (Tokenizer Infix src ss) , Maybe (Tokenizer Unifix src ss) ) tokenizer_lookup mn@(mod `Mod` n) (Tokenizers pres ins posts) = do let pre = Map.lookup mod pres >>= Map.lookup n let post = Map.lookup mod posts >>= Map.lookup n let in_ = case mn of [] `Mod` " " -> Just Tokenizer { tokenizer = Token_Term_App @src @ss , tokenizer_fixity = Infix (Just AssocL) 9 } _ -> Map.lookup mod ins >>= Map.lookup n (pre, in_, post) -- ** Class 'TokenizeR' class TokenizeR src (ss::[*]) (rs::[*]) where tokenizeR :: Proxy rs -> Tokenizers src ss instance TokenizeR src ss '[] where tokenizeR _rs = mempty instance ( TokenizeT src ss s , TokenizeR src ss rs ) => TokenizeR src ss (Proxy s ': rs) where tokenizeR _ = tokenizeR (Proxy @rs) <> tokenizeT (Proxy @s) -- ** Class 'TokenizeT' class TokenizeT src ss s where tokenizeT :: Proxy s -> Tokenizers src ss tokenizeT _t = mempty tokenizeTMod :: Mod_Path -> [(TeName, Tokenizer fix src ss)] -> Map Mod_Path (Map TeName (Tokenizer fix src ss)) tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl token :: Inj_Token ss s => Text -> fixity -> TokenT ss (Proxy s) -> (TeName, Tokenizer fixity src ss) token n tokenizer_fixity tok = (TeName n,) Tokenizer { tokenizer = \src -> Token_Term $ inj_EToken src tok , tokenizer_fixity } -- * Class 'Gram_Name' class ( Alt g , Alter g , Alter g , App g , Try g , Gram_CF g , Gram_Op g , Gram_Lexer g , Gram_RegL g , Gram_Rule g , Gram_Terminal g ) => Gram_Name g where g_mod_path :: CF g Mod_Path g_mod_path = rule "mod_path" $ infixrG (pure <$> g_mod_name) (op <$ char '.') where op = (<>) g_mod_name :: CF g Mod_Name g_mod_name = rule "mod_name" $ (Mod_Name . Text.pack <$>) $ (identG `minus`) $ Fun.const <$> g_term_keywords <*. (any `but` g_term_idname_tail) where identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail) headG = unicat $ Unicat Char.UppercaseLetter g_term_mod_name :: CF g (Mod TeName) g_term_mod_name = rule "term_mod_name" $ lexeme $ g_term_mod_idname <+> parens g_term_mod_opname g_term_name :: CF g TeName g_term_name = rule "term_name" $ lexeme $ g_term_idname <+> parens g_term_opname g_term_mod_idname :: CF g (Mod TeName) g_term_mod_idname = rule "term_mod_idname" $ Mod <$> option [] (try $ g_mod_path <* char '.') <*> g_term_idname g_term_idname :: CF g TeName g_term_idname = rule "term_idname" $ (TeName . Text.pack <$>) $ (identG `minus`) $ Fun.const <$> g_term_keywords <*. (any `but` g_term_idname_tail) where identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail) headG = unicat $ Unicat_Letter g_term_idname_tail :: Terminal g Char g_term_idname_tail = rule "term_idname_tail" $ unicat Unicat_Letter <+> unicat Unicat_Number g_term_keywords :: Reg rl g String g_term_keywords = rule "term_keywords" $ choice $ string <$> ["in", "let"] g_term_mod_opname :: CF g (Mod TeName) g_term_mod_opname = rule "term_mod_opname" $ Mod <$> option [] (try $ g_mod_path <* char '.') <*> g_term_opname g_term_opname :: CF g TeName g_term_opname = rule "term_opname" $ (TeName . Text.pack <$>) $ (symG `minus`) $ Fun.const <$> g_term_keysyms <*. (any `but` g_term_opname_ok) where symG = some $ cf_of_Terminal g_term_opname_ok g_term_opname_ok :: Terminal g Char g_term_opname_ok = rule "term_opname_ok" $ choice (unicat <$> [ Unicat_Symbol , Unicat_Punctuation , Unicat_Mark ]) `but` koG where koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']']) g_term_keysyms :: Reg rl g String g_term_keysyms = rule "term_keysyms" $ choice $ string <$> ["\\", "->", "=", "@"] deriving instance Gram_Name g => Gram_Name (CF g) instance Gram_Name EBNF instance Gram_Name RuleDef -- * Class 'Gram_Term_Type' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Lexer g , Gram_Meta src g , Gram_Rule g , Gram_Terminal g , Gram_Name g , Gram_Type src g ) => Gram_Term_Type src g where g_term_abst_decl :: CF g (TeName, AST_Type src) g_term_abst_decl = rule "term_abst_decl" $ parens $ (,) <$> g_term_name <* symbol ":" <*> g_type deriving instance Gram_Term_Type src g => Gram_Term_Type src (CF g) instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src EBNF instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src RuleDef -- * Class 'Gram_Error' class Gram_Error g where errorG :: CF g (Either Error_Term_Gram a) -> CF g a deriving instance Gram_Error g => Gram_Error (CF g) instance Gram_Error EBNF where errorG (CF (EBNF g)) = CF $ EBNF g instance Gram_Error RuleDef where errorG (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g -- ** Type 'Error_Term_Gram' data Error_Term_Gram = Error_Term_Gram_Fixity Error_Fixity | Error_Term_Gram_Term_incomplete | Error_Term_Gram_Type_applied_to_nothing | Error_Term_Gram_not_applicable | Error_Term_Gram_application | Error_Term_Gram_application_mismatch deriving (Eq, Show) -- * Class 'Gram_Term' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Lexer g , Gram_Meta src g , Gram_Rule g , Gram_Terminal g , Gram_Error g , Gram_Term_AtomsR src ss ss g , Gram_Name g , Gram_Term_Type src g , Gram_Type src g , Inj_Token ss (->) ) => Gram_Term ss src g where tokenizers_get :: CF g (Tokenizers src ss -> a) -> CF g a tokenizers_put :: CF g (Tokenizers src ss, a) -> CF g a g_term :: Inj_Token ss (->) => CF g (AST_Term src ss) g_term = rule "term" $ choice [ try g_term_abst , g_term_operators , g_term_let ] g_term_operators :: Inj_Token ss (->) => CF g (AST_Term src ss) g_term_operators = rule "term_operators" $ errorG $ left Error_Term_Gram_Fixity <$> g_ops where g_ops :: CF g (Either Error_Fixity (AST_Term src ss)) g_ops = operators g_term_atom g_prefix g_infix g_postfix g_prefix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss) g_infix :: CF g (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss) g_postfix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss) g_prefix = errorG $ metaG $ tokenizers_get $ op_prefix <$> g_prefix_op g_infix = errorG $ metaG $ tokenizers_get $ op_infix <$> g_infix_op g_postfix = errorG $ metaG $ tokenizers_get $ op_postfix <$> g_postfix_op op_infix :: Mod TeName -> Tokenizers src ss -> src -> Either Error_Term_Gram (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss) op_infix name toks src = do let (_pre, in_, _post) = tokenizer_lookup name toks case in_ of Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix Just p -> Right $ (tokenizer_fixity p,) $ \a b -> (BinTree0 (tokenizer p src) `BinTree1` a) `BinTree1` b op_prefix, op_postfix :: Mod TeName -> Tokenizers src ss -> src -> Either Error_Term_Gram ( Unifix , AST_Term src ss -> AST_Term src ss ) op_prefix name toks src = do let (pre, _in_, _post) = tokenizer_lookup name toks case pre of Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix Just p -> Right $ (tokenizer_fixity p,) $ \a -> BinTree0 (tokenizer p src) `BinTree1` a op_postfix name toks src = do let (_pre, _in_, post) = tokenizer_lookup name toks case post of Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix Just p -> Right $ (tokenizer_fixity p,) $ \a -> BinTree0 (tokenizer p src) `BinTree1` a g_postfix_op :: CF g (Mod TeName) g_postfix_op = rule "term_op_postfix" $ lexeme $ g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote) g_term_mod_opname g_infix_op :: CF g (Mod TeName) g_infix_op = rule "term_op_infix" $ lexeme $ between g_backquote g_backquote g_term_mod_idname <+> try (Fun.const <$> g_term_mod_opname <*> (string " " <+> string "\n")) <+> pure (Mod [] " ") g_prefix_op :: CF g (Mod TeName) g_prefix_op = rule "term_op_prefix" $ lexeme $ g_term_mod_idname <* g_backquote <+> g_term_mod_opname g_backquote :: Gram_Terminal g' => g' Char g_backquote = char '`' g_term_atom :: Inj_Token ss (->) => CF g (AST_Term src ss) g_term_atom = rule "term_atom" $ choice $ {-(try ( metaG $ (\typ src -> BinTree0 $ inj_EToken src $ Token_Term_Type typ) <$ char '@' <*> g_type) :) $ -} (try <$> gs_term_atomsR (Proxy @ss)) <> [ try $ errorG $ metaG $ tokenizers_get $ (\mn toks src -> do let (_, in_, _) = tokenizer_lookup mn toks case in_ of Just p -> Right $ BinTree0 $ tokenizer p src Nothing -> case mn of [] `Mod` n -> Right $ BinTree0 $ Token_Term_Var src n _ -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix ) <$> g_term_mod_name , g_term_group ] g_term_group :: Inj_Token ss (->) => CF g (AST_Term src ss) g_term_group = rule "term_group" $ parens g_term g_term_abst :: Inj_Token ss (->) => CF g (AST_Term src ss) g_term_abst = rule "term_abst" $ metaG $ ((\(xs, te) src -> foldr (\(x, ty_x) -> BinTree0 . Token_Term_Abst src x ty_x) te xs) <$>) $ g_term_abst_args_body (symbol "\\" *> some g_term_abst_decl <* symbol "->") g_term g_term_abst_args_body :: CF g [(TeName, AST_Type src)] -> CF g (AST_Term src ss) -> CF g ([(TeName, AST_Type src)], AST_Term src ss) -- g_term_abst_args_body args body = (,) <$> args <*> body g_term_abst_args_body cf_args cf_body = tokenizers_put $ tokenizers_get $ (\a b (toks::Tokenizers src ss) -> (toks, (a, b))) <$> (tokenizers_put $ tokenizers_get $ (\args (toks::Tokenizers src ss) -> (,args) Tokenizers { tokenizers_prefix = del (tokenizers_prefix toks) args , tokenizers_infix = ins (tokenizers_infix toks) args , tokenizers_postfix = del (tokenizers_postfix toks) args }) <$> cf_args) <*> cf_body where del = foldr $ \(n, _) -> Map.adjust (Map.delete n) [] ins = foldr $ \(n, _) -> Map.insertWith (<>) [] $ Map.singleton n Tokenizer { tokenizer = \src -> Token_Term_Var src n , tokenizer_fixity = infixN5 } g_term_let :: Inj_Token ss (->) => CF g (AST_Term src ss) g_term_let = rule "term_let" $ metaG $ (\name args bound body src -> BinTree0 $ Token_Term_Let src name (foldr (\(x, ty_x) -> BinTree0 . Token_Term_Abst src x ty_x) bound args) body) <$ symbol "let" <*> g_term_name <*> many g_term_abst_decl <* symbol "=" <*> g_term <* symbol "in" <*> g_term deriving instance ( Gram_Term ss src g , Gram_Term_AtomsR src ss ss (CF g) ) => Gram_Term ss src (CF g) instance ( Gram_Term_AtomsR src ss ss EBNF , Inj_Token ss (->) , Inj_Source (Text_of_Source src) src ) => Gram_Term ss src EBNF where tokenizers_get (CF (EBNF g)) = CF $ EBNF g tokenizers_put (CF (EBNF g)) = CF $ EBNF g instance ( Gram_Term_AtomsR src ss ss RuleDef , Inj_Token ss (->) , Inj_Source (Text_of_Source src) src ) => Gram_Term ss src RuleDef where tokenizers_get (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g tokenizers_put (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g -- ** Class 'Gram_Term_AtomsR' class Gram_Term_AtomsR src (ss::[*]) (rs::[*]) g where gs_term_atomsR :: Proxy rs -> [CF g (AST_Term src ss)] instance Gram_Term_AtomsR src ss '[] g where gs_term_atomsR _rs = [] instance ( Gram_Term_AtomsT src ss t g , Gram_Term_AtomsR src ss rs g ) => Gram_Term_AtomsR src ss (t ': rs) g where gs_term_atomsR _ = gs_term_atomsT (Proxy @t) <> gs_term_atomsR (Proxy @rs) -- ** Class 'Gram_Term_AtomsT' class Gram_Term_AtomsT src ss t g where gs_term_atomsT :: Proxy t -> [CF g (AST_Term src ss)] gs_term_atomsT _t = [] -- instance Gram_Term_AtomsT src ss t RuleDef gram_term :: forall g. ( Gram_Term '[Proxy (->), Proxy Integer] () g ) => [CF g ()] gram_term = [ voiD g_term , voiD g_term_operators , voiD g_term_atom , voiD g_term_group , voiD g_term_abst , void (g_term_abst_decl::CF g (TeName, AST_Type ())) , voiD g_term_let , void g_term_mod_name , void g_term_name , void g_term_idname , void $ cf_of_Terminal g_term_idname_tail , void $ cf_of_Reg g_term_keywords , void g_term_mod_opname , void g_term_opname , void $ cf_of_Terminal g_term_opname_ok , void $ cf_of_Reg g_term_keysyms ] where voiD :: CF g (AST_Term () '[Proxy (->), Proxy Integer]) -> CF g () voiD = (() <$)