{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE UndecidableInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.Symantic.Compiling.Term.Grammar where import Control.Arrow (left) import Control.Monad (foldM, void, (=<<)) import qualified Data.Char as Char import qualified Data.Function as Fun import qualified Data.List as List import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Monoid ((<>)) import Data.Proxy (Proxy(..)) import Data.Text (Text) import qualified Data.Text as Text import Prelude hiding (mod, not, any) import Language.Symantic.Parsing import Language.Symantic.Typing -- * Type 'Term_Name' newtype Term_Name = Term_Name Text deriving (Eq, Ord, Show) -- * Type 'ProTok' -- | Proto 'EToken'. It's almost like a free monad, -- but has a third constructor ('ProTokPi') -- to require a type argument. -- -- NOTE: this type may one day be removed -- if proper type inferencing is done. -- In the meantime it is used to require -- term or type arguments needed to build -- the 'EToken's of polymorphic terms. data ProTok meta ts = ProTokLam (EToken meta ts -> ProTok meta ts) -- ^ Require a term argument. | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts) -- ^ Require a type argument. | ProTok (EToken meta ts) -- ^ No need for any argument. -- | Declared here and not in @Compiling.Lambda@ -- to be able to use 'Token_Term_Var' in 'protok'. data instance TokenT meta (ts::[*]) (Proxy (->)) = Token_Term_Abst Term_Name (EToken meta '[Proxy Token_Type]) (EToken meta ts) | Token_Term_App (EToken meta ts) (EToken meta ts) | Token_Term_Let Term_Name (EToken meta ts) (EToken meta ts) | Token_Term_Var Term_Name | Token_Term_Compose (EToken meta ts) (EToken meta ts) -- * Class 'Tokenize' type Tokenize meta ts = TokenizeR meta ts ts -- ** Type 'Tokenizers' data Tokenizers meta ts = Tokenizers { tokenizers_prefix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts)) , tokenizers_infix :: Map Mod_Path (Map Term_Name (Term_ProTok Infix meta ts)) , tokenizers_postfix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts)) } instance Monoid (Tokenizers meta ts) where mempty = Tokenizers Map.empty Map.empty Map.empty mappend x y = Tokenizers (Map.unionWith Map.union (tokenizers_prefix x) (tokenizers_prefix y)) (Map.unionWith Map.union (tokenizers_infix x) (tokenizers_infix y)) (Map.unionWith Map.union (tokenizers_postfix x) (tokenizers_postfix y)) data Term_ProTok fixy meta ts = Term_ProTok { term_protok :: meta -> ProTok meta ts , term_fixity :: fixy } tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts tokenizers = tokenizeR (Proxy @ts) unProTok :: ProTok meta ts -> Either Error_Term_Gram (EToken meta ts) unProTok (ProTok t) = Right t unProTok _ = Left Error_Term_Gram_Term_incomplete protok :: Inj_Token meta ts (->) => Mod Term_Name -> Tokenizers meta ts -> Either Error_Term_Gram ( Maybe (Term_ProTok Unifix meta ts) , Term_ProTok Infix meta ts , Maybe (Term_ProTok Unifix meta ts) ) protok (mod `Mod` tn) (Tokenizers pres ins posts) = do let pre = Map.lookup mod pres >>= Map.lookup tn let post = Map.lookup mod posts >>= Map.lookup tn in_ <- var_or_err $ Map.lookup mod ins >>= Map.lookup tn return (pre, in_, post) where var_or_err (Just x) = Right x var_or_err Nothing = case mod of [] -> Right (var infixN5) _ -> Left $ Error_Term_Gram_Undefined_term var term_fixity = Term_ProTok { term_protok = \meta -> ProTok $ inj_EToken meta $ Token_Term_Var tn , term_fixity } protok_app :: Inj_Token meta ts (->) => ProTok meta ts -> [Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)] -> Either Error_Term_Gram (ProTok meta ts) protok_app = foldM app where app acc (Left typ) = case acc of ProTokPi g -> Right $ g typ _ -> Left Error_Term_Gram_Cannot_apply_type app acc (Right te) = case acc of ProTokLam f -> Right $ f te ProTok tok@(EToken e) -> Right $ ProTok $ inj_EToken (meta_of e) $ Token_Term_App tok te _ -> Left Error_Term_Gram_Cannot_apply_term -- ** Class 'TokenizeR' class TokenizeR meta (ts::[*]) (rs::[*]) where tokenizeR :: Proxy rs -> Tokenizers meta ts instance TokenizeR meta ts '[] where tokenizeR _rs = mempty instance ( TokenizeT meta ts t , TokenizeR meta ts rs ) => TokenizeR meta ts (t ': rs) where tokenizeR _ = tokenizeR (Proxy @rs) `mappend` tokenizeT (Proxy @t) -- ** Class 'TokenizeT' class TokenizeT meta ts t where tokenizeT :: Proxy t -> Tokenizers meta ts -- tokenizeT _t = [] `Mod` [] tokenizeT _t = mempty tokenizeTMod :: Mod_Path -> [(Term_Name, Term_ProTok fix meta ts)] -> Map Mod_Path (Map Term_Name (Term_ProTok fix meta ts)) tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl tokenize0 :: Inj_Token meta ts t => Text -> fixity -> TokenT meta ts (Proxy t) -> (Term_Name, Term_ProTok fixity meta ts) tokenize0 n term_fixity tok = (Term_Name n,) Term_ProTok { term_protok = \meta -> ProTok $ inj_EToken meta $ tok , term_fixity } tokenize1 :: Inj_Token meta ts t => Text -> fixity -> (EToken meta ts -> TokenT meta ts (Proxy t)) -> (Term_Name, Term_ProTok fixity meta ts) tokenize1 n term_fixity tok = (Term_Name n,) Term_ProTok { term_protok = \meta -> ProTokLam $ \a -> ProTok $ inj_EToken meta $ tok a , term_fixity } tokenize2 :: Inj_Token meta ts t => Text -> fixity -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t)) -> (Term_Name, Term_ProTok fixity meta ts) tokenize2 n term_fixity tok = (Term_Name n,) Term_ProTok { term_protok = \meta -> ProTokLam $ \a -> ProTokLam $ \b -> ProTok $ inj_EToken meta $ tok a b , term_fixity } tokenize3 :: Inj_Token meta ts t => Text -> fixity -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t)) -> (Term_Name, Term_ProTok fixity meta ts) tokenize3 n term_fixity tok = (Term_Name n,) Term_ProTok { term_protok = \meta -> ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c -> ProTok $ inj_EToken meta $ tok a b c , term_fixity } -- * Type 'Mod' type Mod_Path = [Mod_Name] newtype Mod_Name = Mod_Name Text deriving (Eq, Ord, Show) data Mod a = Mod Mod_Path a deriving (Eq, Functor, Ord, Show) -- * Class 'Gram_Term_Name' class ( Alt g , Alter g , Alter g , App g , Try g , Gram_CF g , Gram_Op g , Gram_Lexer g , Gram_RegL g , Gram_Rule g , Gram_Terminal g ) => Gram_Term_Name g where mod_path :: CF g Mod_Path mod_path = rule "mod_path" $ infixrG (pure <$> mod_name) (op <$ char '.') where op = mappend mod_name :: CF g Mod_Name mod_name = rule "mod_name" $ (Mod_Name . Text.pack <$>) $ identG `minus` (Fun.const <$> term_keywords <*. (any `but` term_idname_tail)) where identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail) headG = unicat $ Unicat Char.UppercaseLetter term_mod_name :: CF g (Mod Term_Name) term_mod_name = rule "term_mod_name" $ lexeme $ term_mod_idname <+> parens term_mod_opname term_name :: CF g Term_Name term_name = rule "term_name" $ lexeme $ term_idname <+> parens term_opname term_mod_idname :: CF g (Mod Term_Name) term_mod_idname = rule "term_mod_idname" $ Mod <$> option [] (try $ mod_path <* char '.') <*> term_idname term_idname :: CF g Term_Name term_idname = rule "term_idname" $ (Term_Name . Text.pack <$>) $ (identG `minus`) $ Fun.const <$> term_keywords <*. (any `but` term_idname_tail) where identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail) headG = unicat $ Unicat_Letter term_idname_tail :: Terminal g Char term_idname_tail = rule "term_idname_tail" $ unicat Unicat_Letter <+> unicat Unicat_Number term_keywords :: Reg rl g String term_keywords = rule "term_keywords" $ choice $ string <$> ["in", "let"] term_mod_opname :: CF g (Mod Term_Name) term_mod_opname = rule "term_mod_opname" $ Mod <$> option [] (try $ mod_path <* char '.') <*> term_opname term_opname :: CF g Term_Name term_opname = rule "term_opname" $ (Term_Name . Text.pack <$>) $ (symG `minus`) $ Fun.const <$> term_keysyms <*. (any `but` term_opname_ok) where symG = some $ cf_of_Terminal $ term_opname_ok term_opname_ok :: Terminal g Char term_opname_ok = rule "term_opname_ok" $ choice (unicat <$> [ Unicat_Symbol , Unicat_Punctuation , Unicat_Mark ]) `but` koG where koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']']) term_keysyms :: Reg rl g String term_keysyms = rule "term_keysyms" $ choice $ string <$> ["\\", "->", "="] deriving instance Gram_Term_Name g => Gram_Term_Name (CF g) instance Gram_Term_Name EBNF instance Gram_Term_Name RuleDef -- * Class 'Gram_Term_Type' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Lexer g , Gram_Meta meta g , Gram_Rule g , Gram_Terminal g , Gram_Term_Name g , Gram_Type meta g ) => Gram_Term_Type meta g where term_abst_decl :: CF g (Term_Name, TokType meta) term_abst_decl = rule "term_abst_decl" $ parens $ (,) <$> term_name <* symbol ":" <*> typeG deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g) instance Gram_Term_Type meta EBNF instance Gram_Term_Type meta RuleDef -- * Class 'Gram_Error' class Gram_Error g where term_unError :: CF g (Either Error_Term_Gram a) -> CF g a deriving instance Gram_Error g => Gram_Error (CF g) instance Gram_Error EBNF where term_unError (CF (EBNF g)) = CF $ EBNF g instance Gram_Error RuleDef where term_unError (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF $ g -- ** Type 'Error_Term_Gram' data Error_Term_Gram = Error_Term_Gram_Fixity Error_Fixity | Error_Term_Gram_Cannot_apply_term | Error_Term_Gram_Cannot_apply_type | Error_Term_Gram_Undefined_term | Error_Term_Gram_Term_incomplete deriving (Eq, Show) -- * Class 'Gram_Term' class ( Alt g , Alter g , App g , Gram_CF g , Gram_Lexer g , Gram_Meta meta g , Gram_Rule g , Gram_Terminal g , Gram_Error g , Gram_Term_AtomsR meta ts ts g , Gram_Term_Name g , Gram_Term_Type meta g , Gram_Type meta g ) => Gram_Term ts meta g where -- | Wrap 'term_abst'. Useful to modify body's scope. term_abst_args_body :: CF g [(Term_Name, TokType meta)] -> CF g (EToken meta ts) -> CF g ([(Term_Name, TokType meta)], EToken meta ts) term_abst_args_body args body = (,) <$> args <*> body term_tokenizers :: CF g (Tokenizers meta ts -> a) -> CF g a termG :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) termG = rule "term" $ choice [ try term_abst , term_operators , term_let ] term_operators :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) term_operators = rule "term_operators" $ term_unError $ term_unError $ left Error_Term_Gram_Fixity <$> operators (Right <$> term_app) (term_unError $ metaG $ term_tokenizers $ op_prefix <$> term_op_prefix) (term_unError $ metaG $ term_tokenizers $ op_infix <$> term_op_infix) (term_unError $ metaG $ term_tokenizers $ op_postfix <$> term_op_postfix) where bqG :: Gram_Terminal g' => g' Char bqG = char '`' op_infix name toks meta = do (_pre, in_, _post) <- protok name toks return $ (term_fixity in_,) $ \ma mb -> do a <- ma b <- mb unProTok =<< term_protok in_ meta `protok_app` [Right a, Right b] op_prefix name toks meta = do (pre, _in_, _post) <- protok name toks case pre of Just p -> Right $ (term_fixity p,) $ (=<<) $ \a -> unProTok =<< term_protok p meta `protok_app` [Right a] Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix op_postfix name toks meta = do (_pre, _in_, post) <- protok name toks case post of Just p -> Right $ (term_fixity p,) $ (=<<) $ \a -> unProTok =<< term_protok p meta `protok_app` [Right a] Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix term_op_postfix :: CF g (Mod Term_Name) term_op_postfix = rule "term_op_postfix" $ lexeme $ bqG *> term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` bqG) term_mod_opname term_op_infix :: CF g (Mod Term_Name) term_op_infix = rule "term_op_infix" $ lexeme $ between bqG bqG term_mod_idname <+> term_mod_opname term_op_prefix :: CF g (Mod Term_Name) term_op_prefix = rule "term_op_prefix" $ lexeme $ term_mod_idname <* bqG <+> term_mod_opname term_app :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) term_app = rule "term_app" $ term_unError $ (\a as -> unProTok =<< protok_app a as) <$> term_atom_proto <*> many (try term_atom) term_atom :: Inj_Tokens meta ts '[Proxy (->)] => CF g (Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)) term_atom = rule "term_atom" $ (Left <$ char '@' <*> typeG) <+> (Right <$> term_unError (unProTok <$> term_atom_proto)) term_atom_proto :: Inj_Tokens meta ts '[Proxy (->)] => CF g (ProTok meta ts) term_atom_proto = choice $ try <$> term_atomsR (Proxy @ts) <> [ try $ metaG $ ((\(_, in_, _) -> term_protok in_) <$>) $ term_unError $ term_tokenizers $ protok <$> term_mod_name , ProTok <$> term_group ] term_group :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) term_group = rule "term_group" $ parens termG term_abst :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) term_abst = rule "term_abst" $ metaG $ ((\(xs, te) meta -> List.foldr (\(x, ty_x) -> inj_EToken meta . Token_Term_Abst x ty_x) te xs) <$>) $ term_abst_args_body (symbol "\\" *> some term_abst_decl <* symbol "->") termG term_let :: Inj_Tokens meta ts '[Proxy (->)] => CF g (EToken meta ts) term_let = rule "term_let" $ metaG $ (\name args bound body meta -> inj_EToken meta $ Token_Term_Let name (List.foldr (\(x, ty_x) -> inj_EToken meta . Token_Term_Abst x ty_x) bound args ) body) <$ symbol "let" <*> term_name <*> many term_abst_decl <* symbol "=" <*> termG <* symbol "in" <*> termG deriving instance ( Gram_Term ts meta g , Gram_Term_AtomsR meta ts ts (CF g) ) => Gram_Term ts meta (CF g) instance Gram_Term_AtomsR meta ts ts EBNF => Gram_Term ts meta EBNF where term_tokenizers (CF (EBNF g)) = CF $ EBNF g instance Gram_Term_AtomsR meta ts ts RuleDef => Gram_Term ts meta RuleDef where term_tokenizers (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF $ g -- ** Class 'Gram_Term_AtomsR' class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)] instance Gram_Term_AtomsR meta ts '[] g where term_atomsR _rs = [] instance ( Gram_Term_AtomsT meta ts t g , Gram_Term_AtomsR meta ts rs g ) => Gram_Term_AtomsR meta ts (t ': rs) g where term_atomsR _ = term_atomsT (Proxy @t) <> term_atomsR (Proxy @rs) -- ** Class 'Gram_Term_AtomsT' class Gram_Term_AtomsT meta ts t g where term_atomsT :: Proxy t -> [CF g (ProTok meta ts)] term_atomsT _t = [] instance Gram_Term_AtomsT meta ts t RuleDef gram_term :: forall g. ( Gram_Term '[Proxy (->), Proxy Integer] () g ) => [CF g ()] gram_term = [ ue termG , ue term_operators , ue term_app , ug term_atom , ue term_group , ue term_abst , void (term_abst_decl::CF g (Term_Name, TokType ())) , ue term_let , void term_mod_name , void term_name , void term_idname , void $ cf_of_Terminal term_idname_tail , void $ cf_of_Reg term_keywords , void term_mod_opname , void term_opname , void $ cf_of_Terminal term_opname_ok , void $ cf_of_Reg term_keysyms ] where ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g () ue = (() <$) -- uf :: CF g (ProTok () '[Proxy (->)]) -> CF g () -- uf = (() <$) ug :: CF g (Either (EToken () '[Proxy Token_Type]) (EToken () '[Proxy (->), Proxy Integer])) -> CF g () ug = (() <$)