{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE UndecidableInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.Symantic.Compiling.Term.Grammar where import Control.Arrow (left) import Control.Monad (foldM, void, (=<<)) import qualified Data.Char as Char import qualified Data.Function as Fun import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Proxy (Proxy(..)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) import Data.Text (Text) import qualified Data.Text as Text import Prelude hiding (mod, not, any) import Language.Symantic.Parsing import Language.Symantic.Typing -- * Type 'TeName' newtype TeName = TeName Text deriving (Eq, Ord, Show) instance IsString TeName where fromString = TeName . fromString -- * Type 'ProTok' -- | Proto 'EToken'. It's almost like a free monad, -- but has a third constructor ('ProTokPi') -- to require a type argument. -- -- NOTE: this type may one day be removed -- if proper type inferencing is done. -- In the meantime it is used to require -- term or type arguments needed to build -- the 'EToken's of polymorphic terms. data ProTok meta ts = ProTokLam (EToken meta ts -> ProTok meta ts) -- ^ Require a term argument. | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts) -- ^ Require a type argument. | ProTok (EToken meta ts) -- ^ No need for any argument. -- | Declared here and not in @Compiling.Lambda@ -- to be able to use 'Token_Term_Var' in 'protok'. data instance TokenT meta (ts::[*]) (Proxy (->)) = Token_Term_Abst TeName (EToken meta '[Proxy Token_Type]) (EToken meta ts) | Token_Term_App (EToken meta ts) (EToken meta ts) | Token_Term_Let TeName (EToken meta ts) (EToken meta ts) | Token_Term_Var TeName | Token_Term_Compose (EToken meta ts) (EToken meta ts) -- * Class 'Tokenize' type Tokenize meta ts = TokenizeR meta ts ts -- ** Type 'Tokenizers' data Tokenizers meta ts = Tokenizers { tokenizers_prefix :: Map Mod_Path (Map TeName (ProTok_Term Unifix meta ts)) , tokenizers_infix :: Map Mod_Path (Map TeName (ProTok_Term Infix meta ts)) , tokenizers_postfix :: Map Mod_Path (Map TeName (ProTok_Term Unifix meta ts)) } instance Semigroup (Tokenizers meta ts) where x <> y = Tokenizers (Map.unionWith Map.union (tokenizers_prefix x) (tokenizers_prefix y)) (Map.unionWith Map.union (tokenizers_infix x) (tokenizers_infix y)) (Map.unionWith Map.union (tokenizers_postfix x) (tokenizers_postfix y)) instance Monoid (Tokenizers meta ts) where mempty = Tokenizers Map.empty Map.empty Map.empty mappend = (<>) -- ** Type 'ProTok_Term' data ProTok_Term fixy meta ts = ProTok_Term { protok_term :: meta -> ProTok meta ts , protok_fixity :: fixy } tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts tokenizers = tokenizeR (Proxy @ts) unProTok :: ProTok meta ts -> Either Error_Term_Gram (EToken meta ts) unProTok (ProTok t) = Right t unProTok _ = Left Error_Term_Gram_Term_incomplete protok :: Inj_Token meta ts (->) => Mod TeName -> Tokenizers meta ts -> Either Error_Term_Gram ( Maybe (ProTok_Term Unifix meta ts) , ProTok_Term Infix meta ts , Maybe (ProTok_Term Unifix meta ts) ) protok (mod `Mod` tn) (Tokenizers pres ins posts) = do let pre = Map.lookup mod pres >>= Map.lookup tn let post = Map.lookup mod posts >>= Map.lookup tn in_ <- var_or_err $ Map.lookup mod ins >>= Map.lookup tn return (pre, in_, post) where var_or_err (Just x) = Right x var_or_err Nothing = case mod of [] -> Right $ var infixN5 _ -> Left $ Error_Term_Gram_Undefined_term var protok_fixity = ProTok_Term { protok_term = \meta -> ProTok $ inj_EToken meta $ Token_Term_Var tn , protok_fixity } protok_app :: Inj_Token meta ts (->) => ProTok meta ts -> [Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)] -> Either Error_Term_Gram (ProTok meta ts) protok_app = foldM app where app acc (Left typ) = case acc of ProTokPi g -> Right $ g typ _ -> Left Error_Term_Gram_Cannot_apply_type app acc (Right te) = case acc of ProTokLam f -> Right $ f te ProTok tok@(EToken e) -> Right $ ProTok $ inj_EToken (meta_of e) $ Token_Term_App tok te _ -> Left Error_Term_Gram_Cannot_apply_term -- ** Class 'TokenizeR' class TokenizeR meta (ts::[*]) (rs::[*]) where tokenizeR :: Proxy rs -> Tokenizers meta ts instance TokenizeR meta ts '[] where tokenizeR _rs = mempty instance ( TokenizeT meta ts t , TokenizeR meta ts rs ) => TokenizeR meta ts (t ': rs) where tokenizeR _ = tokenizeR (Proxy @rs) <> tokenizeT (Proxy @t) -- ** Class 'TokenizeT' class TokenizeT meta ts t where tokenizeT :: Proxy t -> Tokenizers meta ts -- tokenizeT _t = [] `Mod` [] tokenizeT _t = mempty tokenizeTMod :: Mod_Path -> [(TeName, ProTok_Term fix meta ts)] -> Map Mod_Path (Map TeName (ProTok_Term fix meta ts)) tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl tokenize0 :: Inj_Token meta ts t => Text -> fixity -> TokenT meta ts (Proxy t) -> (TeName, ProTok_Term fixity meta ts) tokenize0 n protok_fixity tok = (TeName n,) ProTok_Term { protok_term = \meta -> ProTok $ inj_EToken meta $ tok , protok_fixity } tokenize1 :: Inj_Token meta ts t => Text -> fixity -> (EToken meta ts -> TokenT meta ts (Proxy t)) -> (TeName, ProTok_Term fixity meta ts) tokenize1 n protok_fixity tok = (TeName n,) ProTok_Term { protok_term = \meta -> ProTokLam $ \a -> ProTok $ inj_EToken meta $ tok a , protok_fixity } tokenize2 :: Inj_Token meta ts t => Text -> fixity -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t)) -> (TeName, ProTok_Term fixity meta ts) tokenize2 n protok_fixity tok = (TeName n,) ProTok_Term { protok_term = \meta -> ProTokLam $ \a -> ProTokLam $ \b -> ProTok $ inj_EToken meta $ tok a b , protok_fixity } tokenize3 :: Inj_Token meta ts t => Text -> fixity -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t)) -> (TeName, ProTok_Term fixity meta ts) tokenize3 n protok_fixity tok = (TeName n,) ProTok_Term { protok_term = \meta -> ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c -> ProTok $ inj_EToken meta $ tok a b c , protok_fixity } -- * Type 'Mod' type Mod_Path = [Mod_Name] newtype Mod_Name = Mod_Name Text deriving (Eq, Ord, Show) data Mod a = Mod Mod_Path a deriving (Eq, Functor, Ord, Show) -- * Class 'Gram_Name' class ( Alt g , Alter g , Alter g , App g , Try g , Gram_CF g , Gram_Op g , Gram_Lexer g , Gram_RegL g , Gram_Rule g , Gram_Terminal g ) => Gram_Name g where g_mod_path :: CF g Mod_Path g_mod_path = rule "mod_path" $ infixrG (pure <$> g_mod_name) (op <$ char '.') where op = (<>) g_mod_name :: CF g Mod_Name g_mod_name = rule "mod_name" $ (Mod_Name . Text.pack <$>) $ identG `minus` (Fun.const <$> g_term_keywords <*. (any `but` g_term_idname_tail)) where identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail) headG = unicat $ Unicat Char.UppercaseLetter g_term_mod_name :: CF g (Mod TeName) g_term_mod_name = rule "term_mod_name" $ lexeme $ g_term_mod_idname <+> parens g_term_mod_opname g_term_name :: CF g TeName g_term_name = rule "term_name" $ lexeme $ g_term_idname <+> parens g_term_opname g_term_mod_idname :: CF g (Mod TeName) g_term_mod_idname = rule "term_mod_idname" $ Mod <$> option [] (try $ g_mod_path <* char '.') <*> g_term_idname g_term_idname :: CF g TeName g_term_idname = rule "term_idname" $ (TeName . Text.pack <$>) $ (identG `minus`) $ Fun.const <$> g_term_keywords <*. (any `but` g_term_idname_tail) where identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail) headG = unicat $ Unicat_Letter g_term_idname_tail :: Terminal g Char g_term_idname_tail = rule "term_idname_tail" $ unicat Unicat_Letter <+> unicat Unicat_Number g_term_keywords :: Reg rl g String g_term_keywords = rule "term_keywords" $ choice $ string <$> ["in", "let"] g_term_mod_opname :: CF g (Mod TeName) g_term_mod_opname = rule "term_mod_opname" $ Mod <$> option [] (try $ g_mod_path <* char '.') <*> g_term_opname g_term_opname :: CF g TeName g_term_opname = rule "term_opname" $ (TeName . Text.pack <$>) $ (symG `minus`) $ Fun.const <$> g_term_keysyms <*. (any `but` g_term_opname_ok) where symG = some $ cf_of_Terminal g_term_opname_ok g_term_opname_ok :: Terminal g Char g_term_opname_ok = rule "term_opname_ok" $ choice (unicat <$> [ Unicat_Symbol , Unicat_Punctuation , Unicat_Mark ]) `but` koG where koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']']) g_term_keysyms :: Reg rl g String g_term_keysyms = rule "term_keysyms" $ choice $ string <$> ["\\", "->", "="] deriving instance Gram_Name g => Gram_Name (CF g) instance Gram_Name EBNF instance Gram_Name RuleDef -- * Class 'Gram_Term_Type' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Lexer g , Gram_Meta meta g , Gram_Rule g , Gram_Terminal g , Gram_Name g , Gram_Type meta g ) => Gram_Term_Type meta g where g_term_abst_decl :: CF g (TeName, TokType meta) g_term_abst_decl = rule "term_abst_decl" $ parens $ (,) <$> g_term_name <* symbol ":" <*> g_type deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g) instance Gram_Term_Type meta EBNF instance Gram_Term_Type meta RuleDef -- * Class 'Gram_Error' class Gram_Error g where term_unError :: CF g (Either Error_Term_Gram a) -> CF g a deriving instance Gram_Error g => Gram_Error (CF g) instance Gram_Error EBNF where term_unError (CF (EBNF g)) = CF $ EBNF g instance Gram_Error RuleDef where term_unError (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g -- ** Type 'Error_Term_Gram' data Error_Term_Gram = Error_Term_Gram_Fixity Error_Fixity | Error_Term_Gram_Cannot_apply_term | Error_Term_Gram_Cannot_apply_type | Error_Term_Gram_Undefined_term | Error_Term_Gram_Term_incomplete deriving (Eq, Show) -- * Class 'Gram_Term' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Lexer g , Gram_Meta meta g , Gram_Rule g , Gram_Terminal g , Gram_Error g , Gram_Term_AtomsR meta ts ts g , Gram_Name g , Gram_Term_Type meta g , Gram_Type meta g ) => Gram_Term ts meta g where -- | Wrap 'g_term_abst'. Useful to modify body's scope. g_term_abst_args_body :: CF g [(TeName, TokType meta)] -> CF g (EToken meta ts) -> CF g ([(TeName, TokType meta)], EToken meta ts) g_term_abst_args_body args body = (,) <$> args <*> body term_tokenizers :: CF g (Tokenizers meta ts -> a) -> CF g a g_term :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) g_term = rule "term" $ choice [ try g_term_abst , g_term_operators , g_term_let ] g_term_operators :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) g_term_operators = rule "term_operators" $ term_unError $ term_unError $ left Error_Term_Gram_Fixity <$> operators (Right <$> g_term_app) (term_unError $ metaG $ term_tokenizers $ op_prefix <$> g_term_op_prefix) (term_unError $ metaG $ term_tokenizers $ op_infix <$> g_term_op_infix) (term_unError $ metaG $ term_tokenizers $ op_postfix <$> g_term_op_postfix) where g_backquote :: Gram_Terminal g' => g' Char g_backquote = char '`' op_infix name toks meta = do (_pre, in_, _post) <- protok name toks return $ (protok_fixity in_,) $ \ma mb -> do a <- ma b <- mb unProTok =<< protok_term in_ meta `protok_app` [Right a, Right b] op_prefix name toks meta = do (pre, _in_, _post) <- protok name toks case pre of Just p -> Right $ (protok_fixity p,) $ (=<<) $ \a -> unProTok =<< protok_term p meta `protok_app` [Right a] Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix op_postfix name toks meta = do (_pre, _in_, post) <- protok name toks case post of Just p -> Right $ (protok_fixity p,) $ (=<<) $ \a -> unProTok =<< protok_term p meta `protok_app` [Right a] Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix g_term_op_postfix :: CF g (Mod TeName) g_term_op_postfix = rule "term_op_postfix" $ lexeme $ g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote) g_term_mod_opname g_term_op_infix :: CF g (Mod TeName) g_term_op_infix = rule "term_op_infix" $ lexeme $ between g_backquote g_backquote g_term_mod_idname <+> g_term_mod_opname g_term_op_prefix :: CF g (Mod TeName) g_term_op_prefix = rule "term_op_prefix" $ lexeme $ g_term_mod_idname <* g_backquote <+> g_term_mod_opname g_term_app :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) g_term_app = rule "term_app" $ term_unError $ (\a as -> unProTok =<< protok_app a as) <$> g_term_atom_proto <*> many (try g_term_atom) g_term_atom :: Inj_Tokens meta ts '[Proxy (->)] => CF g (Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)) g_term_atom = rule "term_atom" $ (Left <$ char '@' <*> g_type) <+> (Right <$> term_unError (unProTok <$> g_term_atom_proto)) g_term_atom_proto :: Inj_Tokens meta ts '[Proxy (->)] => CF g (ProTok meta ts) g_term_atom_proto = choice $ try <$> gs_term_atomsR (Proxy @ts) <> [ try $ metaG $ ((\(_, in_, _) -> protok_term in_) <$>) $ term_unError $ term_tokenizers $ protok <$> g_term_mod_name , ProTok <$> g_term_group ] g_term_group :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) g_term_group = rule "term_group" $ parens g_term g_term_abst :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) g_term_abst = rule "term_abst" $ metaG $ ((\(xs, te) meta -> foldr (\(x, ty_x) -> inj_EToken meta . Token_Term_Abst x ty_x) te xs) <$>) $ g_term_abst_args_body (symbol "\\" *> some g_term_abst_decl <* symbol "->") g_term g_term_let :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) g_term_let = rule "term_let" $ metaG $ (\name args bound body meta -> inj_EToken meta $ Token_Term_Let name (foldr (\(x, ty_x) -> inj_EToken meta . Token_Term_Abst x ty_x) bound args) body) <$ symbol "let" <*> g_term_name <*> many g_term_abst_decl <* symbol "=" <*> g_term <* symbol "in" <*> g_term deriving instance ( Gram_Term ts meta g , Gram_Term_AtomsR meta ts ts (CF g) ) => Gram_Term ts meta (CF g) instance Gram_Term_AtomsR meta ts ts EBNF => Gram_Term ts meta EBNF where term_tokenizers (CF (EBNF g)) = CF $ EBNF g instance Gram_Term_AtomsR meta ts ts RuleDef => Gram_Term ts meta RuleDef where term_tokenizers (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g -- ** Class 'Gram_Term_AtomsR' class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where gs_term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)] instance Gram_Term_AtomsR meta ts '[] g where gs_term_atomsR _rs = [] instance ( Gram_Term_AtomsT meta ts t g , Gram_Term_AtomsR meta ts rs g ) => Gram_Term_AtomsR meta ts (t ': rs) g where gs_term_atomsR _ = gs_term_atomsT (Proxy @t) <> gs_term_atomsR (Proxy @rs) -- ** Class 'Gram_Term_AtomsT' class Gram_Term_AtomsT meta ts t g where gs_term_atomsT :: Proxy t -> [CF g (ProTok meta ts)] gs_term_atomsT _t = [] instance Gram_Term_AtomsT meta ts t RuleDef gram_term :: forall g. ( Gram_Term '[Proxy (->), Proxy Integer] () g ) => [CF g ()] gram_term = [ ue g_term , ue g_term_operators , ue g_term_app , ug g_term_atom , ue g_term_group , ue g_term_abst , void (g_term_abst_decl::CF g (TeName, TokType ())) , ue g_term_let , void g_term_mod_name , void g_term_name , void g_term_idname , void $ cf_of_Terminal g_term_idname_tail , void $ cf_of_Reg g_term_keywords , void g_term_mod_opname , void g_term_opname , void $ cf_of_Terminal g_term_opname_ok , void $ cf_of_Reg g_term_keysyms ] where ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g () ue = (() <$) -- uf :: CF g (ProTok () '[Proxy (->)]) -> CF g () -- uf = (() <$) ug :: CF g (Either (EToken () '[Proxy Token_Type]) (EToken () '[Proxy (->), Proxy Integer])) -> CF g () ug = (() <$)