{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE UndecidableInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.Symantic.Compiling.Term.Grammar where import Control.Arrow (left) import Control.Monad (foldM, void, (=<<)) import qualified Data.Char as Char import qualified Data.Function as Fun import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Proxy (Proxy(..)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) import Data.Text (Text) import qualified Data.Text as Text import Prelude hiding (mod, not, any) import Language.Symantic.Parsing import Language.Symantic.Typing -- * Type 'TeName' newtype TeName = TeName Text deriving (Eq, Ord, Show) instance IsString TeName where fromString = TeName . fromString -- * Type 'ProTok' -- | Proto 'EToken'. It's almost like a free monad, -- but with more constructors to avoid the need -- for sub constructors when requiring a type rather than a term. -- -- NOTE: this type may one day be removed -- if proper polymorphic types are implemented. -- In the meantime it is used to require -- term or type arguments needed to build -- the 'EToken's of polymorphic terms. data ProTok meta ts = ProTokLam (EToken meta ts -> ProTok meta ts) -- ^ Require a term argument. | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts) -- ^ Require a type argument. | ProTokTe (EToken meta ts) -- ^ Immediat term, no need for any argument. | ProTokTy (EToken meta '[Proxy Token_Type]) -- ^ Immediat type, no need for any argument. | ProTokApp -- ^ Require a term argument to perform a 'Token_Term_App'. -- -- Useful to make the term application symbolized by a space -- be an operator, and thus be integrated with the other operators. -- This enables prefix and postfix operators to be applied -- before the term application. instance (Show_Token meta ts, Show meta) => Show (ProTok meta ts) where showsPrec n ProTokLam{} = showParen (n > 10) $ showString "ProTokLam" showsPrec n ProTokPi{} = showParen (n > 10) $ showString "ProTokPi" showsPrec n (ProTokTe tok) = showParen (n > 10) $ showString "ProTokTe " . showsPrec 11 tok showsPrec n (ProTokTy typ) = showParen (n > 10) $ showString "ProTokTe " . showsPrec 11 typ showsPrec n ProTokApp = showParen (n > 10) $ showString "ProTokApp" -- | Declared here and not in @Compiling.Lambda@ -- to be able to use 'Token_Term_Var'. data instance TokenT meta (ts::[*]) (Proxy (->)) = Token_Term_Abst TeName (EToken meta '[Proxy Token_Type]) (EToken meta ts) | Token_Term_App (EToken meta ts) (EToken meta ts) | Token_Term_Let TeName (EToken meta ts) (EToken meta ts) | Token_Term_Var TeName | Token_Term_Compose (EToken meta ts) (EToken meta ts) -- * Class 'Tokenize' type Tokenize meta ts = TokenizeR meta ts ts -- ** Type 'Tokenizers' data Tokenizers meta ts = Tokenizers { tokenizers_prefix :: Map Mod_Path (Map TeName (ProTok_Term Unifix meta ts)) , tokenizers_infix :: Map Mod_Path (Map TeName (ProTok_Term Infix meta ts)) , tokenizers_postfix :: Map Mod_Path (Map TeName (ProTok_Term Unifix meta ts)) } deriving instance ( Show (ProTok_Term Unifix meta ts) , Show (ProTok_Term Infix meta ts) ) => Show (Tokenizers meta ts) instance Semigroup (Tokenizers meta ts) where x <> y = Tokenizers (Map.unionWith Map.union (tokenizers_prefix x) (tokenizers_prefix y)) (Map.unionWith Map.union (tokenizers_infix x) (tokenizers_infix y)) (Map.unionWith Map.union (tokenizers_postfix x) (tokenizers_postfix y)) instance Monoid (Tokenizers meta ts) where mempty = Tokenizers Map.empty Map.empty Map.empty mappend = (<>) -- ** Type 'ProTok_Term' data ProTok_Term fixy meta ts = ProTok_Term { protok_term :: meta -> ProTok meta ts , protok_fixity :: fixy } tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts tokenizers = tokenizeR (Proxy @ts) unProTok :: ProTok meta ts -> Either Error_Term_Gram (EToken meta ts) unProTok (ProTokTe t) = Right t unProTok (ProTokTy _) = Left $ Error_Term_Gram_Type_applied_to_nothing unProTok _tok = Left $ Error_Term_Gram_Term_incomplete -- | Lookup the given 'Mod' 'TeName' into the given 'Tokenizers', -- returning an error or a 'ProTok' constructor for prefix, infix and postfix positions. protok_lookup :: Inj_Token meta ts (->) => Mod TeName -> Tokenizers meta ts -> Either Error_Term_Gram ( Maybe (ProTok_Term Unifix meta ts) , Maybe (ProTok_Term Infix meta ts) , Maybe (ProTok_Term Unifix meta ts) ) protok_lookup mn@(mod `Mod` n) (Tokenizers pres ins posts) = do let pre = Map.lookup mod pres >>= Map.lookup n let post = Map.lookup mod posts >>= Map.lookup n let in_ = case mn of [] `Mod` " " -> Just ProTok_Term { protok_term = \_meta -> ProTokApp , protok_fixity = Infix (Just AssocL) 9 } _ -> Map.lookup mod ins >>= Map.lookup n return (pre, in_, post) -- | Apply a proto-token to another. -- -- This is required because polymorphic types are not implemented (yet), -- therefore tokens for polymorphic types must have enough 'EToken's -- to make them monomorphic types. protok_app :: Inj_Token meta ts (->) => ProTok meta ts -> ProTok meta ts -> Either Error_Term_Gram (ProTok meta ts) protok_app (ProTokLam f) (ProTokTe a) = Right $ f a protok_app (ProTokPi f) (ProTokTy a) = Right $ f a protok_app (ProTokTe f) (ProTokTe a) = Right $ ProTokTe $ inj_EToken (meta_of f) $ -- TODO: merge (meta_of a) f `Token_Term_App` a protok_app ProTokApp f@ProTokLam{} = Right f protok_app ProTokApp f@ProTokPi{} = Right f protok_app ProTokApp (ProTokTe f) = Right $ ProTokLam $ \a -> ProTokTe $ inj_EToken (meta_of f) $ -- TODO: merge (meta_of a) f `Token_Term_App` a protok_app ProTokLam{} _ = Left $ Error_Term_Gram_application_mismatch protok_app ProTokPi{} _ = Left $ Error_Term_Gram_application_mismatch protok_app ProTokTe{} _ = Left $ Error_Term_Gram_not_applicable protok_app ProTokTy{} _ = Left $ Error_Term_Gram_not_applicable protok_app ProTokApp{} _ = Left $ Error_Term_Gram_application -- ** Class 'TokenizeR' class TokenizeR meta (ts::[*]) (rs::[*]) where tokenizeR :: Proxy rs -> Tokenizers meta ts instance TokenizeR meta ts '[] where tokenizeR _rs = mempty instance ( TokenizeT meta ts t , TokenizeR meta ts rs ) => TokenizeR meta ts (t ': rs) where tokenizeR _ = tokenizeR (Proxy @rs) <> tokenizeT (Proxy @t) -- ** Class 'TokenizeT' class TokenizeT meta ts t where tokenizeT :: Proxy t -> Tokenizers meta ts -- tokenizeT _t = [] `Mod` [] tokenizeT _t = mempty tokenizeTMod :: Mod_Path -> [(TeName, ProTok_Term fix meta ts)] -> Map Mod_Path (Map TeName (ProTok_Term fix meta ts)) tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl tokenize0 :: Inj_Token meta ts t => Text -> fixity -> TokenT meta ts (Proxy t) -> (TeName, ProTok_Term fixity meta ts) tokenize0 n protok_fixity tok = (TeName n,) ProTok_Term { protok_term = \meta -> ProTokTe $ inj_EToken meta $ tok , protok_fixity } tokenize1 :: Inj_Token meta ts t => Text -> fixity -> (EToken meta ts -> TokenT meta ts (Proxy t)) -> (TeName, ProTok_Term fixity meta ts) tokenize1 n protok_fixity tok = (TeName n,) ProTok_Term { protok_term = \meta -> ProTokLam $ \a -> ProTokTe $ inj_EToken meta $ tok a , protok_fixity } tokenize2 :: Inj_Token meta ts t => Text -> fixity -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t)) -> (TeName, ProTok_Term fixity meta ts) tokenize2 n protok_fixity tok = (TeName n,) ProTok_Term { protok_term = \meta -> ProTokLam $ \a -> ProTokLam $ \b -> ProTokTe $ inj_EToken meta $ tok a b , protok_fixity } tokenize3 :: Inj_Token meta ts t => Text -> fixity -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t)) -> (TeName, ProTok_Term fixity meta ts) tokenize3 n protok_fixity tok = (TeName n,) ProTok_Term { protok_term = \meta -> ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c -> ProTokTe $ inj_EToken meta $ tok a b c , protok_fixity } -- * Type 'Mod' type Mod_Path = [Mod_Name] newtype Mod_Name = Mod_Name Text deriving (Eq, Ord, Show) data Mod a = Mod Mod_Path a deriving (Eq, Functor, Ord, Show) -- * Class 'Gram_Name' class ( Alt g , Alter g , Alter g , App g , Try g , Gram_CF g , Gram_Op g , Gram_Lexer g , Gram_RegL g , Gram_Rule g , Gram_Terminal g ) => Gram_Name g where g_mod_path :: CF g Mod_Path g_mod_path = rule "mod_path" $ infixrG (pure <$> g_mod_name) (op <$ char '.') where op = (<>) g_mod_name :: CF g Mod_Name g_mod_name = rule "mod_name" $ (Mod_Name . Text.pack <$>) $ (identG `minus`) $ Fun.const <$> g_term_keywords <*. (any `but` g_term_idname_tail) where identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail) headG = unicat $ Unicat Char.UppercaseLetter g_term_mod_name :: CF g (Mod TeName) g_term_mod_name = rule "term_mod_name" $ lexeme $ g_term_mod_idname <+> parens g_term_mod_opname g_term_name :: CF g TeName g_term_name = rule "term_name" $ lexeme $ g_term_idname <+> parens g_term_opname g_term_mod_idname :: CF g (Mod TeName) g_term_mod_idname = rule "term_mod_idname" $ Mod <$> option [] (try $ g_mod_path <* char '.') <*> g_term_idname g_term_idname :: CF g TeName g_term_idname = rule "term_idname" $ (TeName . Text.pack <$>) $ (identG `minus`) $ Fun.const <$> g_term_keywords <*. (any `but` g_term_idname_tail) where identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail) headG = unicat $ Unicat_Letter g_term_idname_tail :: Terminal g Char g_term_idname_tail = rule "term_idname_tail" $ unicat Unicat_Letter <+> unicat Unicat_Number g_term_keywords :: Reg rl g String g_term_keywords = rule "term_keywords" $ choice $ string <$> ["in", "let"] g_term_mod_opname :: CF g (Mod TeName) g_term_mod_opname = rule "term_mod_opname" $ Mod <$> option [] (try $ g_mod_path <* char '.') <*> g_term_opname g_term_opname :: CF g TeName g_term_opname = rule "term_opname" $ (TeName . Text.pack <$>) $ (symG `minus`) $ Fun.const <$> g_term_keysyms <*. (any `but` g_term_opname_ok) where symG = some $ cf_of_Terminal g_term_opname_ok g_term_opname_ok :: Terminal g Char g_term_opname_ok = rule "term_opname_ok" $ choice (unicat <$> [ Unicat_Symbol , Unicat_Punctuation , Unicat_Mark ]) `but` koG where koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']']) g_term_keysyms :: Reg rl g String g_term_keysyms = rule "term_keysyms" $ choice $ string <$> ["\\", "->", "=", "@"] deriving instance Gram_Name g => Gram_Name (CF g) instance Gram_Name EBNF instance Gram_Name RuleDef -- * Class 'Gram_Term_Type' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Lexer g , Gram_Meta meta g , Gram_Rule g , Gram_Terminal g , Gram_Name g , Gram_Type meta g ) => Gram_Term_Type meta g where g_term_abst_decl :: CF g (TeName, TokType meta) g_term_abst_decl = rule "term_abst_decl" $ parens $ (,) <$> g_term_name <* symbol ":" <*> g_type deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g) instance Gram_Term_Type meta EBNF instance Gram_Term_Type meta RuleDef -- * Class 'Gram_Error' class Gram_Error g where term_unError :: CF g (Either Error_Term_Gram a) -> CF g a deriving instance Gram_Error g => Gram_Error (CF g) instance Gram_Error EBNF where term_unError (CF (EBNF g)) = CF $ EBNF g instance Gram_Error RuleDef where term_unError (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g -- ** Type 'Error_Term_Gram' data Error_Term_Gram = Error_Term_Gram_Fixity Error_Fixity | Error_Term_Gram_Term_incomplete | Error_Term_Gram_Type_applied_to_nothing | Error_Term_Gram_not_applicable | Error_Term_Gram_application | Error_Term_Gram_application_mismatch deriving (Eq, Show) -- * Class 'Gram_Term' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Lexer g , Gram_Meta meta g , Gram_Rule g , Gram_Terminal g , Gram_Error g , Gram_Term_AtomsR meta ts ts g , Gram_Name g , Gram_Term_Type meta g , Gram_Type meta g , Inj_Token meta ts (->) ) => Gram_Term ts meta g where tokenizers_get :: CF g (Tokenizers meta ts -> a) -> CF g a tokenizers_put :: CF g (Tokenizers meta ts, a) -> CF g a g_term :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) g_term = rule "term" $ choice [ try g_term_abst , g_term_operators , g_term_let ] g_term_operators :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) g_term_operators = rule "term_operators" $ term_unError $ ((unProTok =<<) <$>) $ term_unError $ left Error_Term_Gram_Fixity <$> ops where ops :: CF g (Either Error_Fixity (Either Error_Term_Gram (ProTok meta ts))) ops = operators (Right <$> g_term_atom) (term_unError $ metaG $ tokenizers_get $ op_prefix <$> g_term_op_prefix) (term_unError $ metaG $ tokenizers_get $ op_infix <$> g_term_op_infix) (term_unError $ metaG $ tokenizers_get $ op_postfix <$> g_term_op_postfix) op_infix :: Mod TeName -> Tokenizers meta ts -> meta -> Either Error_Term_Gram ( Infix , Either Error_Term_Gram (ProTok meta ts) -> Either Error_Term_Gram (ProTok meta ts) -> Either Error_Term_Gram (ProTok meta ts) ) op_infix name toks meta = do (_pre, in_, _post) <- protok_lookup name toks case in_ of Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix Just p -> Right $ (protok_fixity p,) $ \ma mb -> do a <- ma b <- mb foldM protok_app (protok_term p meta) [a, b] op_prefix, op_postfix :: Mod TeName -> Tokenizers meta ts -> meta -> Either Error_Term_Gram ( Unifix , Either Error_Term_Gram (ProTok meta ts) -> Either Error_Term_Gram (ProTok meta ts) ) op_prefix name toks meta = do (pre, _in_, _post) <- protok_lookup name toks case pre of Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix Just p -> Right $ (protok_fixity p,) $ \ma -> do a <- ma protok_term p meta `protok_app` a op_postfix name toks meta = do (_pre, _in_, post) <- protok_lookup name toks case post of Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix Just p -> Right $ (protok_fixity p,) $ \ma -> do a <- ma protok_term p meta `protok_app` a g_term_op_postfix :: CF g (Mod TeName) g_term_op_postfix = rule "term_op_postfix" $ lexeme $ g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote) g_term_mod_opname g_term_op_infix :: CF g (Mod TeName) g_term_op_infix = rule "term_op_infix" $ lexeme $ between g_backquote g_backquote g_term_mod_idname <+> try (Fun.const <$> g_term_mod_opname <*> (string " " <+> string "\n")) <+> pure (Mod [] " ") g_term_op_prefix :: CF g (Mod TeName) g_term_op_prefix = rule "term_op_prefix" $ lexeme $ g_term_mod_idname <* g_backquote <+> g_term_mod_opname g_backquote :: Gram_Terminal g' => g' Char g_backquote = char '`' g_term_atom :: Inj_Tokens meta ts '[Proxy (->)] => CF g (ProTok meta ts) g_term_atom = rule "term_atom" $ choice $ (try (ProTokTy <$ char '@' <*> g_type) :) $ try <$> gs_term_atomsR (Proxy @ts) <> [ try $ metaG $ term_unError $ tokenizers_get $ (\mn toks -> do (_, in_, _) <- protok_lookup mn toks case in_ of Nothing -> case mn of [] `Mod` n -> Right $ \meta -> ProTokTe $ inj_EToken meta $ Token_Term_Var n _ -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix Just p -> Right $ protok_term p ) <$> g_term_mod_name , ProTokTe <$> g_term_group ] g_term_group :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) g_term_group = rule "term_group" $ parens g_term g_term_abst :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) g_term_abst = rule "term_abst" $ metaG $ ((\(xs, te) meta -> foldr (\(x, ty_x) -> inj_EToken meta . Token_Term_Abst x ty_x) te xs) <$>) $ g_term_abst_args_body (symbol "\\" *> some g_term_abst_decl <* symbol "->") g_term g_term_abst_args_body :: CF g [(TeName, TokType meta)] -> CF g (EToken meta ts) -> CF g ([(TeName, TokType meta)], EToken meta ts) -- g_term_abst_args_body args body = (,) <$> args <*> body g_term_abst_args_body cf_args cf_body = tokenizers_put $ tokenizers_get $ (\a b (toks::Tokenizers meta ts) -> (toks, (a, b))) <$> (tokenizers_put $ tokenizers_get $ (\args (toks::Tokenizers meta ts) -> (,args) Tokenizers { tokenizers_prefix = del (tokenizers_prefix toks) args , tokenizers_infix = ins (tokenizers_infix toks) args , tokenizers_postfix = del (tokenizers_postfix toks) args }) <$> cf_args) <*> cf_body where del = foldr $ \(n, _) -> Map.adjust (Map.delete n) [] ins = foldr $ \(n, _) -> Map.insertWith (<>) [] $ Map.singleton n ProTok_Term { protok_term = \meta -> ProTokTe $ inj_EToken meta $ Token_Term_Var n , protok_fixity = infixN5 } g_term_let :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) g_term_let = rule "term_let" $ metaG $ (\name args bound body meta -> inj_EToken meta $ Token_Term_Let name (foldr (\(x, ty_x) -> inj_EToken meta . Token_Term_Abst x ty_x) bound args) body) <$ symbol "let" <*> g_term_name <*> many g_term_abst_decl <* symbol "=" <*> g_term <* symbol "in" <*> g_term deriving instance ( Gram_Term ts meta g , Gram_Term_AtomsR meta ts ts (CF g) ) => Gram_Term ts meta (CF g) instance ( Gram_Term_AtomsR meta ts ts EBNF , Inj_Token meta ts (->) ) => Gram_Term ts meta EBNF where tokenizers_get (CF (EBNF g)) = CF $ EBNF g tokenizers_put (CF (EBNF g)) = CF $ EBNF g instance ( Gram_Term_AtomsR meta ts ts RuleDef , Inj_Token meta ts (->) ) => Gram_Term ts meta RuleDef where tokenizers_get (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g tokenizers_put (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g -- ** Class 'Gram_Term_AtomsR' class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where gs_term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)] instance Gram_Term_AtomsR meta ts '[] g where gs_term_atomsR _rs = [] instance ( Gram_Term_AtomsT meta ts t g , Gram_Term_AtomsR meta ts rs g ) => Gram_Term_AtomsR meta ts (t ': rs) g where gs_term_atomsR _ = gs_term_atomsT (Proxy @t) <> gs_term_atomsR (Proxy @rs) -- ** Class 'Gram_Term_AtomsT' class Gram_Term_AtomsT meta ts t g where gs_term_atomsT :: Proxy t -> [CF g (ProTok meta ts)] gs_term_atomsT _t = [] -- instance Gram_Term_AtomsT meta ts t RuleDef gram_term :: forall g. ( Gram_Term '[Proxy (->), Proxy Integer] () g ) => [CF g ()] gram_term = [ ue g_term , ue g_term_operators , up g_term_atom , ue g_term_group , ue g_term_abst , void (g_term_abst_decl::CF g (TeName, TokType ())) , ue g_term_let , void g_term_mod_name , void g_term_name , void g_term_idname , void $ cf_of_Terminal g_term_idname_tail , void $ cf_of_Reg g_term_keywords , void g_term_mod_opname , void g_term_opname , void $ cf_of_Terminal g_term_opname_ok , void $ cf_of_Reg g_term_keysyms ] where up :: CF g (ProTok () '[Proxy (->), Proxy Integer]) -> CF g () up = (() <$) ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g () ue = (() <$)