{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE UndecidableInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.Symantic.Compiling.Term.Grammar where import Control.Arrow (left) import Control.Monad (foldM, void, (=<<)) import qualified Data.Char as Char import qualified Data.Function as Fun import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Proxy (Proxy(..)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) import Data.Text (Text) import qualified Data.Text as Text import Prelude hiding (mod, not, any) import Language.Symantic.Parsing import Language.Symantic.Typing -- * Type 'TeName' newtype TeName = TeName Text deriving (Eq, Ord, Show) instance IsString TeName where fromString = TeName . fromString -- * Type 'ProTok' -- | Proto 'EToken'. It's almost like a free monad, -- but has a third constructor ('ProTokPi') -- to require a type argument. -- -- NOTE: this type may one day be removed -- if proper type inferencing is done. -- In the meantime it is used to require -- term or type arguments needed to build -- the 'EToken's of polymorphic terms. data ProTok meta ts = ProTokLam (EToken meta ts -> ProTok meta ts) -- ^ Require a term argument. | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts) -- ^ Require a type argument. | ProTok (EToken meta ts) -- ^ No need for any argument. -- | Declared here and not in @Compiling.Lambda@ -- to be able to use 'Token_Term_Var' in 'protok'. data instance TokenT meta (ts::[*]) (Proxy (->)) = Token_Term_Abst TeName (EToken meta '[Proxy Token_Type]) (EToken meta ts) | Token_Term_App (EToken meta ts) (EToken meta ts) | Token_Term_Let TeName (EToken meta ts) (EToken meta ts) | Token_Term_Var TeName | Token_Term_Compose (EToken meta ts) (EToken meta ts) -- * Class 'Tokenize' type Tokenize meta ts = TokenizeR meta ts ts -- ** Type 'Tokenizers' data Tokenizers meta ts = Tokenizers { tokenizers_prefix :: Map Mod_Path (Map TeName (ProTok_Term Unifix meta ts)) , tokenizers_infix :: Map Mod_Path (Map TeName (ProTok_Term Infix meta ts)) , tokenizers_postfix :: Map Mod_Path (Map TeName (ProTok_Term Unifix meta ts)) } instance Semigroup (Tokenizers meta ts) where x <> y = Tokenizers (Map.unionWith Map.union (tokenizers_prefix x) (tokenizers_prefix y)) (Map.unionWith Map.union (tokenizers_infix x) (tokenizers_infix y)) (Map.unionWith Map.union (tokenizers_postfix x) (tokenizers_postfix y)) instance Monoid (Tokenizers meta ts) where mempty = Tokenizers Map.empty Map.empty Map.empty mappend = (<>) -- ** Type 'ProTok_Term' data ProTok_Term fixy meta ts = ProTok_Term { protok_term :: meta -> ProTok meta ts , protok_fixity :: fixy } tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts tokenizers = tokenizeR (Proxy @ts) unProTok :: ProTok meta ts -> Either Error_Term_Gram (EToken meta ts) unProTok (ProTok t) = Right t unProTok _ = Left Error_Term_Gram_Term_incomplete protok :: Inj_Token meta ts (->) => Mod TeName -> Tokenizers meta ts -> Either Error_Term_Gram ( Maybe (ProTok_Term Unifix meta ts) , ProTok_Term Infix meta ts , Maybe (ProTok_Term Unifix meta ts) ) protok (mod `Mod` tn) (Tokenizers pres ins posts) = do let pre = Map.lookup mod pres >>= Map.lookup tn let post = Map.lookup mod posts >>= Map.lookup tn in_ <- var_or_err $ Map.lookup mod ins >>= Map.lookup tn return (pre, in_, post) where var_or_err (Just x) = Right x var_or_err Nothing = case mod of [] -> Right $ var infixN5 _ -> Left $ Error_Term_Gram_Undefined_term var protok_fixity = ProTok_Term { protok_term = \meta -> ProTok $ inj_EToken meta $ Token_Term_Var tn , protok_fixity } protok_app :: Inj_Token meta ts (->) => ProTok meta ts -> [Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)] -> Either Error_Term_Gram (ProTok meta ts) protok_app = foldM app where app acc (Left typ) = case acc of ProTokPi g -> Right $ g typ _ -> Left Error_Term_Gram_Cannot_apply_type app acc (Right te) = case acc of ProTokLam f -> Right $ f te ProTok tok@(EToken e) -> Right $ ProTok $ inj_EToken (meta_of e) $ Token_Term_App tok te _ -> Left Error_Term_Gram_Cannot_apply_term -- ** Class 'TokenizeR' class TokenizeR meta (ts::[*]) (rs::[*]) where tokenizeR :: Proxy rs -> Tokenizers meta ts instance TokenizeR meta ts '[] where tokenizeR _rs = mempty instance ( TokenizeT meta ts t , TokenizeR meta ts rs ) => TokenizeR meta ts (t ': rs) where tokenizeR _ = tokenizeR (Proxy @rs) <> tokenizeT (Proxy @t) -- ** Class 'TokenizeT' class TokenizeT meta ts t where tokenizeT :: Proxy t -> Tokenizers meta ts -- tokenizeT _t = [] `Mod` [] tokenizeT _t = mempty tokenizeTMod :: Mod_Path -> [(TeName, ProTok_Term fix meta ts)] -> Map Mod_Path (Map TeName (ProTok_Term fix meta ts)) tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl tokenize0 :: Inj_Token meta ts t => Text -> fixity -> TokenT meta ts (Proxy t) -> (TeName, ProTok_Term fixity meta ts) tokenize0 n protok_fixity tok = (TeName n,) ProTok_Term { protok_term = \meta -> ProTok $ inj_EToken meta $ tok , protok_fixity } tokenize1 :: Inj_Token meta ts t => Text -> fixity -> (EToken meta ts -> TokenT meta ts (Proxy t)) -> (TeName, ProTok_Term fixity meta ts) tokenize1 n protok_fixity tok = (TeName n,) ProTok_Term { protok_term = \meta -> ProTokLam $ \a -> ProTok $ inj_EToken meta $ tok a , protok_fixity } tokenize2 :: Inj_Token meta ts t => Text -> fixity -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t)) -> (TeName, ProTok_Term fixity meta ts) tokenize2 n protok_fixity tok = (TeName n,) ProTok_Term { protok_term = \meta -> ProTokLam $ \a -> ProTokLam $ \b -> ProTok $ inj_EToken meta $ tok a b , protok_fixity } tokenize3 :: Inj_Token meta ts t => Text -> fixity -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t)) -> (TeName, ProTok_Term fixity meta ts) tokenize3 n protok_fixity tok = (TeName n,) ProTok_Term { protok_term = \meta -> ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c -> ProTok $ inj_EToken meta $ tok a b c , protok_fixity } -- * Type 'Mod' type Mod_Path = [Mod_Name] newtype Mod_Name = Mod_Name Text deriving (Eq, Ord, Show) data Mod a = Mod Mod_Path a deriving (Eq, Functor, Ord, Show) -- * Class 'Gram_TeName' class ( Alt g , Alter g , Alter g , App g , Try g , Gram_CF g , Gram_Op g , Gram_Lexer g , Gram_RegL g , Gram_Rule g , Gram_Terminal g ) => Gram_TeName g where mod_path :: CF g Mod_Path mod_path = rule "mod_path" $ infixrG (pure <$> mod_name) (op <$ char '.') where op = (<>) mod_name :: CF g Mod_Name mod_name = rule "mod_name" $ (Mod_Name . Text.pack <$>) $ identG `minus` (Fun.const <$> term_keywords <*. (any `but` term_idname_tail)) where identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail) headG = unicat $ Unicat Char.UppercaseLetter term_mod_name :: CF g (Mod TeName) term_mod_name = rule "term_mod_name" $ lexeme $ term_mod_idname <+> parens term_mod_opname term_name :: CF g TeName term_name = rule "term_name" $ lexeme $ term_idname <+> parens term_opname term_mod_idname :: CF g (Mod TeName) term_mod_idname = rule "term_mod_idname" $ Mod <$> option [] (try $ mod_path <* char '.') <*> term_idname term_idname :: CF g TeName term_idname = rule "term_idname" $ (TeName . Text.pack <$>) $ (identG `minus`) $ Fun.const <$> term_keywords <*. (any `but` term_idname_tail) where identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail) headG = unicat $ Unicat_Letter term_idname_tail :: Terminal g Char term_idname_tail = rule "term_idname_tail" $ unicat Unicat_Letter <+> unicat Unicat_Number term_keywords :: Reg rl g String term_keywords = rule "term_keywords" $ choice $ string <$> ["in", "let"] term_mod_opname :: CF g (Mod TeName) term_mod_opname = rule "term_mod_opname" $ Mod <$> option [] (try $ mod_path <* char '.') <*> term_opname term_opname :: CF g TeName term_opname = rule "term_opname" $ (TeName . Text.pack <$>) $ (symG `minus`) $ Fun.const <$> term_keysyms <*. (any `but` term_opname_ok) where symG = some $ cf_of_Terminal term_opname_ok term_opname_ok :: Terminal g Char term_opname_ok = rule "term_opname_ok" $ choice (unicat <$> [ Unicat_Symbol , Unicat_Punctuation , Unicat_Mark ]) `but` koG where koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']']) term_keysyms :: Reg rl g String term_keysyms = rule "term_keysyms" $ choice $ string <$> ["\\", "->", "="] deriving instance Gram_TeName g => Gram_TeName (CF g) instance Gram_TeName EBNF instance Gram_TeName RuleDef -- * Class 'Gram_Term_Type' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Lexer g , Gram_Meta meta g , Gram_Rule g , Gram_Terminal g , Gram_TeName g , Gram_Type meta g ) => Gram_Term_Type meta g where term_abst_decl :: CF g (TeName, TokType meta) term_abst_decl = rule "term_abst_decl" $ parens $ (,) <$> term_name <* symbol ":" <*> typeG deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g) instance Gram_Term_Type meta EBNF instance Gram_Term_Type meta RuleDef -- * Class 'Gram_Error' class Gram_Error g where term_unError :: CF g (Either Error_Term_Gram a) -> CF g a deriving instance Gram_Error g => Gram_Error (CF g) instance Gram_Error EBNF where term_unError (CF (EBNF g)) = CF $ EBNF g instance Gram_Error RuleDef where term_unError (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g -- ** Type 'Error_Term_Gram' data Error_Term_Gram = Error_Term_Gram_Fixity Error_Fixity | Error_Term_Gram_Cannot_apply_term | Error_Term_Gram_Cannot_apply_type | Error_Term_Gram_Undefined_term | Error_Term_Gram_Term_incomplete deriving (Eq, Show) -- * Class 'Gram_Term' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Lexer g , Gram_Meta meta g , Gram_Rule g , Gram_Terminal g , Gram_Error g , Gram_Term_AtomsR meta ts ts g , Gram_TeName g , Gram_Term_Type meta g , Gram_Type meta g ) => Gram_Term ts meta g where -- | Wrap 'term_abst'. Useful to modify body's scope. term_abst_args_body :: CF g [(TeName, TokType meta)] -> CF g (EToken meta ts) -> CF g ([(TeName, TokType meta)], EToken meta ts) term_abst_args_body args body = (,) <$> args <*> body term_tokenizers :: CF g (Tokenizers meta ts -> a) -> CF g a termG :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) termG = rule "term" $ choice [ try term_abst , term_operators , term_let ] term_operators :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) term_operators = rule "term_operators" $ term_unError $ term_unError $ left Error_Term_Gram_Fixity <$> operators (Right <$> term_app) (term_unError $ metaG $ term_tokenizers $ op_prefix <$> term_op_prefix) (term_unError $ metaG $ term_tokenizers $ op_infix <$> term_op_infix) (term_unError $ metaG $ term_tokenizers $ op_postfix <$> term_op_postfix) where bqG :: Gram_Terminal g' => g' Char bqG = char '`' op_infix name toks meta = do (_pre, in_, _post) <- protok name toks return $ (protok_fixity in_,) $ \ma mb -> do a <- ma b <- mb unProTok =<< protok_term in_ meta `protok_app` [Right a, Right b] op_prefix name toks meta = do (pre, _in_, _post) <- protok name toks case pre of Just p -> Right $ (protok_fixity p,) $ (=<<) $ \a -> unProTok =<< protok_term p meta `protok_app` [Right a] Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix op_postfix name toks meta = do (_pre, _in_, post) <- protok name toks case post of Just p -> Right $ (protok_fixity p,) $ (=<<) $ \a -> unProTok =<< protok_term p meta `protok_app` [Right a] Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix term_op_postfix :: CF g (Mod TeName) term_op_postfix = rule "term_op_postfix" $ lexeme $ bqG *> term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` bqG) term_mod_opname term_op_infix :: CF g (Mod TeName) term_op_infix = rule "term_op_infix" $ lexeme $ between bqG bqG term_mod_idname <+> term_mod_opname term_op_prefix :: CF g (Mod TeName) term_op_prefix = rule "term_op_prefix" $ lexeme $ term_mod_idname <* bqG <+> term_mod_opname term_app :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) term_app = rule "term_app" $ term_unError $ (\a as -> unProTok =<< protok_app a as) <$> term_atom_proto <*> many (try term_atom) term_atom :: Inj_Tokens meta ts '[Proxy (->)] => CF g (Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)) term_atom = rule "term_atom" $ (Left <$ char '@' <*> typeG) <+> (Right <$> term_unError (unProTok <$> term_atom_proto)) term_atom_proto :: Inj_Tokens meta ts '[Proxy (->)] => CF g (ProTok meta ts) term_atom_proto = choice $ try <$> term_atomsR (Proxy @ts) <> [ try $ metaG $ ((\(_, in_, _) -> protok_term in_) <$>) $ term_unError $ term_tokenizers $ protok <$> term_mod_name , ProTok <$> term_group ] term_group :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) term_group = rule "term_group" $ parens termG term_abst :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) term_abst = rule "term_abst" $ metaG $ ((\(xs, te) meta -> foldr (\(x, ty_x) -> inj_EToken meta . Token_Term_Abst x ty_x) te xs) <$>) $ term_abst_args_body (symbol "\\" *> some term_abst_decl <* symbol "->") termG term_let :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) term_let = rule "term_let" $ metaG $ (\name args bound body meta -> inj_EToken meta $ Token_Term_Let name (foldr (\(x, ty_x) -> inj_EToken meta . Token_Term_Abst x ty_x) bound args) body) <$ symbol "let" <*> term_name <*> many term_abst_decl <* symbol "=" <*> termG <* symbol "in" <*> termG deriving instance ( Gram_Term ts meta g , Gram_Term_AtomsR meta ts ts (CF g) ) => Gram_Term ts meta (CF g) instance Gram_Term_AtomsR meta ts ts EBNF => Gram_Term ts meta EBNF where term_tokenizers (CF (EBNF g)) = CF $ EBNF g instance Gram_Term_AtomsR meta ts ts RuleDef => Gram_Term ts meta RuleDef where term_tokenizers (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g -- ** Class 'Gram_Term_AtomsR' class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)] instance Gram_Term_AtomsR meta ts '[] g where term_atomsR _rs = [] instance ( Gram_Term_AtomsT meta ts t g , Gram_Term_AtomsR meta ts rs g ) => Gram_Term_AtomsR meta ts (t ': rs) g where term_atomsR _ = term_atomsT (Proxy @t) <> term_atomsR (Proxy @rs) -- ** Class 'Gram_Term_AtomsT' class Gram_Term_AtomsT meta ts t g where term_atomsT :: Proxy t -> [CF g (ProTok meta ts)] term_atomsT _t = [] instance Gram_Term_AtomsT meta ts t RuleDef gram_term :: forall g. ( Gram_Term '[Proxy (->), Proxy Integer] () g ) => [CF g ()] gram_term = [ ue termG , ue term_operators , ue term_app , ug term_atom , ue term_group , ue term_abst , void (term_abst_decl::CF g (TeName, TokType ())) , ue term_let , void term_mod_name , void term_name , void term_idname , void $ cf_of_Terminal term_idname_tail , void $ cf_of_Reg term_keywords , void term_mod_opname , void term_opname , void $ cf_of_Terminal term_opname_ok , void $ cf_of_Reg term_keysyms ] where ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g () ue = (() <$) -- uf :: CF g (ProTok () '[Proxy (->)]) -> CF g () -- uf = (() <$) ug :: CF g (Either (EToken () '[Proxy Token_Type]) (EToken () '[Proxy (->), Proxy Integer])) -> CF g () ug = (() <$)