{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE UndecidableInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.Symantic.Compiling.Term.Grammar where import Control.Arrow (left) import Control.Monad (void) import Data.Map.Strict (Map) import Data.Proxy (Proxy(..)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) import Data.Text (Text) import Prelude hiding (mod, not, any) import qualified Data.Char as Char import qualified Data.Function as Fun import qualified Data.Map.Strict as Map import qualified Data.Text as Text import Language.Symantic.Parsing import Language.Symantic.Typing -- * Type 'TeName' newtype TeName = TeName Text deriving (Eq, Ord, Show) instance IsString TeName where fromString = TeName . fromString -- * Type 'Mod' type Mod_Path = [Mod_Name] newtype Mod_Name = Mod_Name Text deriving (Eq, Ord, Show) data Mod a = Mod Mod_Path a deriving (Eq, Functor, Ord, Show) -- * Type 'AST_Term' -- type AST_Term src ts = BinTree (EToken src ts) -- NOTE: Declared here rather than in @Lib.Lambda@ to be able to use them here. data instance TokenT src (ts::[*]) (Proxy (->)) = Token_Term_Abst TeName (AST_Type src) (AST_Term src ts) | Token_Term_App | Token_Term_Let TeName (AST_Term src ts) (AST_Term src ts) | Token_Term_Var TeName | Token_Term_Type (AST_Type src) | Token_Term_Compose {- (~>) :: Inj_TyConst cs (->) => Type cs a -> Type cs b -> Type cs (a -> b) (~>) a b = ty @(->) :$ a :$ b infixr 5 ~> -- (.) :: (b -> c) -> (a -> b) -> a -> c ty_compose :: Inj_TyConsts cs [Proxy (->), Proxy Monad] => Type cs (Any::Void) ty_compose = TyPi SKiType $ \a -> TyPi SKiType $ \b -> TyPi SKiType $ \c -> tyVoid $ (b ~> c) ~> (a ~> b) ~> a ~> c -- ($) :: (a -> b) -> a -> b ty_app :: Inj_TyConsts cs [Proxy (->), Proxy Monad] => Type cs (Any::Void) ty_app = TyPi SKiType $ \a -> TyPi SKiType $ \b -> tyVoid $ (a ~> b) ~> a ~> b -} -- * Class 'Tokenize' type Tokenize src ts = TokenizeR src ts ts -- ** Type 'Tokenizer' data Tokenizer fixy src ts = Tokenizer { tokenizer :: src -> EToken src ts , tokenizer_fixity :: fixy } -- ** Type 'Tokenizers' data Tokenizers src ts = Tokenizers { tokenizers_prefix :: Map Mod_Path (Map TeName (Tokenizer Unifix src ts)) , tokenizers_infix :: Map Mod_Path (Map TeName (Tokenizer Infix src ts)) , tokenizers_postfix :: Map Mod_Path (Map TeName (Tokenizer Unifix src ts)) } deriving instance ( Show (Tokenizer Unifix src ts) , Show (Tokenizer Infix src ts) ) => Show (Tokenizers src ts) instance Semigroup (Tokenizers src ts) where x <> y = Tokenizers (Map.unionWith Map.union (tokenizers_prefix x) (tokenizers_prefix y)) (Map.unionWith Map.union (tokenizers_infix x) (tokenizers_infix y)) (Map.unionWith Map.union (tokenizers_postfix x) (tokenizers_postfix y)) instance Monoid (Tokenizers src ts) where mempty = Tokenizers Map.empty Map.empty Map.empty mappend = (<>) tokenizers :: forall src ts. Tokenize src ts => Tokenizers src ts tokenizers = tokenizeR (Proxy @ts) {- unProTok :: ProTok src ts -> Either Error_Term_Gram (EToken src ts) unProTok (ProTokTe t) = Right t unProTok (ProTokTy _) = Left $ Error_Term_Gram_Type_applied_to_nothing unProTok _tok = Left $ Error_Term_Gram_Term_incomplete -} -- | Lookup the given 'Mod' 'TeName' into the given 'Tokenizers', -- returning for prefix, infix and postfix positions, when there is a match. tokenizer_lookup :: Inj_Token src ts (->) => Mod TeName -> Tokenizers src ts -> ( Maybe (Tokenizer Unifix src ts) , Maybe (Tokenizer Infix src ts) , Maybe (Tokenizer Unifix src ts) ) tokenizer_lookup mn@(mod `Mod` n) (Tokenizers pres ins posts) = do let pre = Map.lookup mod pres >>= Map.lookup n let post = Map.lookup mod posts >>= Map.lookup n let in_ = case mn of [] `Mod` " " -> Just Tokenizer { tokenizer = \src -> inj_EToken src Token_Term_App , tokenizer_fixity = Infix (Just AssocL) 9 } _ -> Map.lookup mod ins >>= Map.lookup n (pre, in_, post) {- -- | Apply a proto-token to another. -- -- This is required because polymorphic types are not implemented (yet), -- therefore tokens for polymorphic types must have enough 'EToken's -- to make them monomorphic types. protok_app :: Inj_Token src ts (->) => ProTok src ts -> ProTok src ts -> Either Error_Term_Gram (ProTok src ts) protok_app (ProTokLam f) (ProTokTe a) = Right $ f a protok_app (ProTokPi f) (ProTokTy a) = Right $ f a protok_app (ProTokTe f) (ProTokTe a) = Right $ ProTokTe $ inj_EToken (meta_of f) $ -- TODO: merge (meta_of a) f `Token_Term_App` a protok_app ProTokApp f@ProTokLam{} = Right f protok_app ProTokApp f@ProTokPi{} = Right f protok_app ProTokApp (ProTokTe f) = Right $ ProTokLam $ \a -> ProTokTe $ inj_EToken (meta_of f) $ -- TODO: merge (meta_of a) f `Token_Term_App` a protok_app ProTokLam{} _ = Left $ Error_Term_Gram_application_mismatch protok_app ProTokPi{} _ = Left $ Error_Term_Gram_application_mismatch protok_app ProTokTe{} _ = Left $ Error_Term_Gram_not_applicable protok_app ProTokTy{} _ = Left $ Error_Term_Gram_not_applicable protok_app ProTokApp{} _ = Left $ Error_Term_Gram_application -} -- ** Class 'TokenizeR' class TokenizeR src (ts::[*]) (rs::[*]) where tokenizeR :: Proxy rs -> Tokenizers src ts instance TokenizeR src ts '[] where tokenizeR _rs = mempty instance ( TokenizeT src ts t , TokenizeR src ts rs ) => TokenizeR src ts (t ': rs) where tokenizeR _ = tokenizeR (Proxy @rs) <> tokenizeT (Proxy @t) -- ** Class 'TokenizeT' class TokenizeT src ts t where tokenizeT :: Proxy t -> Tokenizers src ts -- tokenizeT _t = [] `Mod` [] tokenizeT _t = mempty tokenizeTMod :: Mod_Path -> [(TeName, Tokenizer fix src ts)] -> Map Mod_Path (Map TeName (Tokenizer fix src ts)) tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl tokenize0 :: Inj_Token src ts t => Text -> fixity -> TokenT src ts (Proxy t) -> (TeName, Tokenizer fixity src ts) tokenize0 n tokenizer_fixity tok = (TeName n,) Tokenizer { tokenizer = \src -> inj_EToken src tok , tokenizer_fixity } {- tokenize1 :: Inj_Token src ts t => Text -> fixity -> (EToken src ts -> TokenT src ts (Proxy t)) -> (TeName, Tokenizer fixity src ts) tokenize1 n tokenizer_fixity tok = (TeName n,) Tokenizer { tokenizer = \src -> ProTokLam $ \a -> ProTokTe $ inj_EToken src $ tok a , tokenizer_fixity } tokenize2 :: Inj_Token src ts t => Text -> fixity -> (EToken src ts -> EToken src ts -> TokenT src ts (Proxy t)) -> (TeName, Tokenizer fixity src ts) tokenize2 n tokenizer_fixity tok = (TeName n,) Tokenizer { tokenizer = \src -> ProTokLam $ \a -> ProTokLam $ \b -> ProTokTe $ inj_EToken src $ tok a b , tokenizer_fixity } tokenize3 :: Inj_Token src ts t => Text -> fixity -> (EToken src ts -> EToken src ts -> EToken src ts -> TokenT src ts (Proxy t)) -> (TeName, Tokenizer fixity src ts) tokenize3 n tokenizer_fixity tok = (TeName n,) Tokenizer { tokenizer = \src -> ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c -> ProTokTe $ inj_EToken src $ tok a b c , tokenizer_fixity } -} -- * Class 'Gram_Name' class ( Alt g , Alter g , Alter g , App g , Try g , Gram_CF g , Gram_Op g , Gram_Lexer g , Gram_RegL g , Gram_Rule g , Gram_Terminal g ) => Gram_Name g where g_mod_path :: CF g Mod_Path g_mod_path = rule "mod_path" $ infixrG (pure <$> g_mod_name) (op <$ char '.') where op = (<>) g_mod_name :: CF g Mod_Name g_mod_name = rule "mod_name" $ (Mod_Name . Text.pack <$>) $ (identG `minus`) $ Fun.const <$> g_term_keywords <*. (any `but` g_term_idname_tail) where identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail) headG = unicat $ Unicat Char.UppercaseLetter g_term_mod_name :: CF g (Mod TeName) g_term_mod_name = rule "term_mod_name" $ lexeme $ g_term_mod_idname <+> parens g_term_mod_opname g_term_name :: CF g TeName g_term_name = rule "term_name" $ lexeme $ g_term_idname <+> parens g_term_opname g_term_mod_idname :: CF g (Mod TeName) g_term_mod_idname = rule "term_mod_idname" $ Mod <$> option [] (try $ g_mod_path <* char '.') <*> g_term_idname g_term_idname :: CF g TeName g_term_idname = rule "term_idname" $ (TeName . Text.pack <$>) $ (identG `minus`) $ Fun.const <$> g_term_keywords <*. (any `but` g_term_idname_tail) where identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail) headG = unicat $ Unicat_Letter g_term_idname_tail :: Terminal g Char g_term_idname_tail = rule "term_idname_tail" $ unicat Unicat_Letter <+> unicat Unicat_Number g_term_keywords :: Reg rl g String g_term_keywords = rule "term_keywords" $ choice $ string <$> ["in", "let"] g_term_mod_opname :: CF g (Mod TeName) g_term_mod_opname = rule "term_mod_opname" $ Mod <$> option [] (try $ g_mod_path <* char '.') <*> g_term_opname g_term_opname :: CF g TeName g_term_opname = rule "term_opname" $ (TeName . Text.pack <$>) $ (symG `minus`) $ Fun.const <$> g_term_keysyms <*. (any `but` g_term_opname_ok) where symG = some $ cf_of_Terminal g_term_opname_ok g_term_opname_ok :: Terminal g Char g_term_opname_ok = rule "term_opname_ok" $ choice (unicat <$> [ Unicat_Symbol , Unicat_Punctuation , Unicat_Mark ]) `but` koG where koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']']) g_term_keysyms :: Reg rl g String g_term_keysyms = rule "term_keysyms" $ choice $ string <$> ["\\", "->", "=", "@"] deriving instance Gram_Name g => Gram_Name (CF g) instance Gram_Name EBNF instance Gram_Name RuleDef -- * Class 'Gram_Term_Type' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Lexer g , Gram_Meta src g , Gram_Rule g , Gram_Terminal g , Gram_Name g , Gram_Type src g ) => Gram_Term_Type src g where g_term_abst_decl :: CF g (TeName, AST_Type src) g_term_abst_decl = rule "term_abst_decl" $ parens $ (,) <$> g_term_name <* symbol ":" <*> g_type deriving instance Gram_Term_Type src g => Gram_Term_Type src (CF g) instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src EBNF instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src RuleDef -- * Class 'Gram_Error' class Gram_Error g where errorG :: CF g (Either Error_Term_Gram a) -> CF g a deriving instance Gram_Error g => Gram_Error (CF g) instance Gram_Error EBNF where errorG (CF (EBNF g)) = CF $ EBNF g instance Gram_Error RuleDef where errorG (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g -- ** Type 'Error_Term_Gram' data Error_Term_Gram = Error_Term_Gram_Fixity Error_Fixity | Error_Term_Gram_Term_incomplete | Error_Term_Gram_Type_applied_to_nothing | Error_Term_Gram_not_applicable | Error_Term_Gram_application | Error_Term_Gram_application_mismatch deriving (Eq, Show) -- * Class 'Gram_Term' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Lexer g , Gram_Meta src g , Gram_Rule g , Gram_Terminal g , Gram_Error g , Gram_Term_AtomsR src ts ts g , Gram_Name g , Gram_Term_Type src g , Gram_Type src g , Inj_Token src ts (->) ) => Gram_Term ts src g where tokenizers_get :: CF g (Tokenizers src ts -> a) -> CF g a tokenizers_put :: CF g (Tokenizers src ts, a) -> CF g a g_term :: Inj_Token src ts (->) => CF g (AST_Term src ts) g_term = rule "term" $ choice [ try g_term_abst , g_term_operators , g_term_let ] g_term_operators :: Inj_Token src ts (->) => CF g (AST_Term src ts) g_term_operators = rule "term_operators" $ errorG $ left Error_Term_Gram_Fixity <$> g_ops where g_ops :: CF g (Either Error_Fixity (AST_Term src ts)) g_ops = operators g_term_atom g_prefix g_infix g_postfix g_prefix :: CF g (Unifix, AST_Term src ts -> AST_Term src ts) g_infix :: CF g (Infix, AST_Term src ts -> AST_Term src ts -> AST_Term src ts) g_postfix :: CF g (Unifix, AST_Term src ts -> AST_Term src ts) g_prefix = errorG $ metaG $ tokenizers_get $ op_prefix <$> g_prefix_op g_infix = errorG $ metaG $ tokenizers_get $ op_infix <$> g_infix_op g_postfix = errorG $ metaG $ tokenizers_get $ op_postfix <$> g_postfix_op op_infix :: Mod TeName -> Tokenizers src ts -> src -> Either Error_Term_Gram (Infix, AST_Term src ts -> AST_Term src ts -> AST_Term src ts) op_infix name toks src = do let (_pre, in_, _post) = tokenizer_lookup name toks case in_ of Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix Just p -> Right $ (tokenizer_fixity p,) $ \a b -> (BinTree0 (tokenizer p src) `BinTree1` a) `BinTree1` b op_prefix, op_postfix :: Mod TeName -> Tokenizers src ts -> src -> Either Error_Term_Gram ( Unifix , AST_Term src ts -> AST_Term src ts ) op_prefix name toks src = do let (pre, _in_, _post) = tokenizer_lookup name toks case pre of Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix Just p -> Right $ (tokenizer_fixity p,) $ \a -> BinTree0 (tokenizer p src) `BinTree1` a op_postfix name toks src = do let (_pre, _in_, post) = tokenizer_lookup name toks case post of Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix Just p -> Right $ (tokenizer_fixity p,) $ \a -> BinTree0 (tokenizer p src) `BinTree1` a g_postfix_op :: CF g (Mod TeName) g_postfix_op = rule "term_op_postfix" $ lexeme $ g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote) g_term_mod_opname g_infix_op :: CF g (Mod TeName) g_infix_op = rule "term_op_infix" $ lexeme $ between g_backquote g_backquote g_term_mod_idname <+> try (Fun.const <$> g_term_mod_opname <*> (string " " <+> string "\n")) <+> pure (Mod [] " ") g_prefix_op :: CF g (Mod TeName) g_prefix_op = rule "term_op_prefix" $ lexeme $ g_term_mod_idname <* g_backquote <+> g_term_mod_opname g_backquote :: Gram_Terminal g' => g' Char g_backquote = char '`' g_term_atom :: Inj_Token src ts (->) => CF g (AST_Term src ts) g_term_atom = rule "term_atom" $ choice $ (try ( metaG $ (\typ src -> BinTree0 $ inj_EToken src $ Token_Term_Type typ) <$ char '@' <*> g_type) :) $ (try . (BinTree0 <$>) <$> gs_term_atomsR (Proxy @ts)) <> [ try $ errorG $ metaG $ tokenizers_get $ (\mn toks src -> do let (_, in_, _) = tokenizer_lookup mn toks case in_ of Just p -> Right $ BinTree0 $ tokenizer p src Nothing -> case mn of [] `Mod` n -> Right $ BinTree0 $ inj_EToken src $ Token_Term_Var n _ -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix ) <$> g_term_mod_name , g_term_group ] g_term_group :: Inj_Token src ts (->) => CF g (AST_Term src ts) g_term_group = rule "term_group" $ parens g_term g_term_abst :: Inj_Token src ts (->) => CF g (AST_Term src ts) g_term_abst = rule "term_abst" $ metaG $ ((\(xs, te) src -> foldr (\(x, ty_x) -> BinTree0 . inj_EToken src . Token_Term_Abst x ty_x) te xs) <$>) $ g_term_abst_args_body (symbol "\\" *> some g_term_abst_decl <* symbol "->") g_term g_term_abst_args_body :: CF g [(TeName, AST_Type src)] -> CF g (AST_Term src ts) -> CF g ([(TeName, AST_Type src)], AST_Term src ts) -- g_term_abst_args_body args body = (,) <$> args <*> body g_term_abst_args_body cf_args cf_body = tokenizers_put $ tokenizers_get $ (\a b (toks::Tokenizers src ts) -> (toks, (a, b))) <$> (tokenizers_put $ tokenizers_get $ (\args (toks::Tokenizers src ts) -> (,args) Tokenizers { tokenizers_prefix = del (tokenizers_prefix toks) args , tokenizers_infix = ins (tokenizers_infix toks) args , tokenizers_postfix = del (tokenizers_postfix toks) args }) <$> cf_args) <*> cf_body where del = foldr $ \(n, _) -> Map.adjust (Map.delete n) [] ins = foldr $ \(n, _) -> Map.insertWith (<>) [] $ Map.singleton n Tokenizer { tokenizer = \src -> inj_EToken src $ Token_Term_Var n , tokenizer_fixity = infixN5 } g_term_let :: Inj_Token src ts (->) => CF g (AST_Term src ts) g_term_let = rule "term_let" $ metaG $ (\name args bound body src -> BinTree0 $ inj_EToken src $ Token_Term_Let name (foldr (\(x, ty_x) -> BinTree0 . inj_EToken src . Token_Term_Abst x ty_x) bound args) body) <$ symbol "let" <*> g_term_name <*> many g_term_abst_decl <* symbol "=" <*> g_term <* symbol "in" <*> g_term deriving instance ( Gram_Term ts src g , Gram_Term_AtomsR src ts ts (CF g) ) => Gram_Term ts src (CF g) instance ( Gram_Term_AtomsR src ts ts EBNF , Inj_Token src ts (->) , Inj_Source (Text_of_Source src) src ) => Gram_Term ts src EBNF where tokenizers_get (CF (EBNF g)) = CF $ EBNF g tokenizers_put (CF (EBNF g)) = CF $ EBNF g instance ( Gram_Term_AtomsR src ts ts RuleDef , Inj_Token src ts (->) , Inj_Source (Text_of_Source src) src ) => Gram_Term ts src RuleDef where tokenizers_get (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g tokenizers_put (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g -- ** Class 'Gram_Term_AtomsR' class Gram_Term_AtomsR src (ts::[*]) (rs::[*]) g where gs_term_atomsR :: Proxy rs -> [CF g (EToken src ts)] instance Gram_Term_AtomsR src ts '[] g where gs_term_atomsR _rs = [] instance ( Gram_Term_AtomsT src ts t g , Gram_Term_AtomsR src ts rs g ) => Gram_Term_AtomsR src ts (t ': rs) g where gs_term_atomsR _ = gs_term_atomsT (Proxy @t) <> gs_term_atomsR (Proxy @rs) -- ** Class 'Gram_Term_AtomsT' class Gram_Term_AtomsT src ts t g where gs_term_atomsT :: Proxy t -> [CF g (EToken src ts)] gs_term_atomsT _t = [] -- instance Gram_Term_AtomsT src ts t RuleDef gram_term :: forall g. ( Gram_Term '[Proxy (->), Proxy Integer] () g ) => [CF g ()] gram_term = [ voiD g_term , voiD g_term_operators , voiD g_term_atom , voiD g_term_group , voiD g_term_abst , void (g_term_abst_decl::CF g (TeName, AST_Type ())) , voiD g_term_let , void g_term_mod_name , void g_term_name , void g_term_idname , void $ cf_of_Terminal g_term_idname_tail , void $ cf_of_Reg g_term_keywords , void g_term_mod_opname , void g_term_opname , void $ cf_of_Terminal g_term_opname_ok , void $ cf_of_Reg g_term_keysyms ] where voiD :: CF g (AST_Term () '[Proxy (->), Proxy Integer]) -> CF g () voiD = (() <$)