{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE ExistentialQuantification #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE UndecidableInstances #-} module Language.Symantic.Compiling.Grammar where import Control.Arrow (left) import Control.Monad (void) import Data.Map.Strict (Map) import Data.Proxy (Proxy(..)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) import Data.Text (Text) import Prelude hiding (mod, not, any) import qualified Data.Char as Char import qualified Data.Function as Fun import qualified Data.Map.Strict as Map import qualified Data.Text as Text import Language.Symantic.Grammar import Language.Symantic.Typing import Language.Symantic.Compiling.Term -- * Type 'Mod' data Mod a = Mod PathMod a deriving (Eq, Functor, Ord, Show) -- ** Type 'PathMod' type PathMod = [NameMod] -- ** Type 'NameMod' newtype NameMod = NameMod Text deriving (Eq, Ord, Show) -- ** Type 'NameTe' newtype NameTe = NameTe Text deriving (Eq, Ord, Show) instance IsString NameTe where fromString = NameTe . fromString -- * Type 'Modules' data Modules src ss = Modules { modules_prefix :: Map PathMod (Map NameTe (Tokenizer Unifix src ss)) , modules_infix :: Map PathMod (Map NameTe (Tokenizer Infix src ss)) , modules_postfix :: Map PathMod (Map NameTe (Tokenizer Unifix src ss)) } deriving instance ( Show (Tokenizer Unifix src ss) , Show (Tokenizer Infix src ss) ) => Show (Modules src ss) instance Semigroup (Modules src ss) where x <> y = Modules { modules_prefix = Map.unionWith (<>) (modules_prefix x) (modules_prefix y) , modules_infix = Map.unionWith (<>) (modules_infix x) (modules_infix y) , modules_postfix = Map.unionWith (<>) (modules_postfix x) (modules_postfix y) } instance Monoid (Modules src ss) where mempty = Modules Map.empty Map.empty Map.empty mappend = (<>) -- ** Type 'Tokenizer' data Tokenizer fixy src ss = Tokenizer { token_fixity :: fixy , token_term :: src -> Token_Term src ss } -- ** Type 'Token_Term' data Token_Term src ss = Token_Term (TermVT_CF src ss) | Token_Term_Abst src NameTe (AST_Type src) (AST_Term src ss) | Token_Term_Var src NameTe | Token_Term_Let src NameTe (AST_Term src ss) (AST_Term src ss) | Token_Term_App src deriving (Eq, Show) -- ** Type 'AST_Term' -- | /Abstract Syntax Tree/ of 'Token_Term'. type AST_Term src ss = BinTree (Token_Term src ss) -- * Class 'Inj_Modules' type Inj_Modules src ss = Inj_ModulesR src ss ss inj_modules :: forall src ss. Inj_Modules src ss => Modules src ss inj_modules = inj_modulesR (Proxy @ss) -- ** Class 'Inj_ModulesR' class Inj_ModulesR src (ss::[*]) (rs::[*]) where inj_modulesR :: Proxy rs -> Modules src ss instance Inj_ModulesR src ss '[] where inj_modulesR _rs = mempty instance ( Module src ss s , Inj_ModulesR src ss rs ) => Inj_ModulesR src ss (Proxy s ': rs) where inj_modulesR _ = inj_modulesR (Proxy @rs) <> module_ (Proxy @s) -- | Lookup the given 'Mod' 'NameTe' into the given 'Modules', -- returning for prefix, infix and postfix positions, when there is a match. modulesLookup :: forall src ss. Mod NameTe -> Modules src ss -> ( Maybe (Tokenizer Unifix src ss) , Maybe (Tokenizer Infix src ss) , Maybe (Tokenizer Unifix src ss) ) modulesLookup mn@(mod `Mod` n) (Modules pres ins posts) = do let pre = Map.lookup mod pres >>= Map.lookup n let post = Map.lookup mod posts >>= Map.lookup n let in_ = case mn of [] `Mod` " " -> Just Tokenizer { token_term = Token_Term_App @src @ss , token_fixity = Infix (Just AssocL) 9 } _ -> Map.lookup mod ins >>= Map.lookup n (pre, in_, post) -- * Class 'Module' class Module src ss s where module_ :: Proxy s -> Modules src ss module_ _t = mempty moduleWhere :: forall src ss. Source src => PathMod -> [DefTerm src ss] -> Modules src ss moduleWhere mod lst = Modules { modules_infix = mk $ \(n `WithFixity` fixy := t) -> case fixy of Fixity2 inf -> [(n, Tokenizer inf $ Token_Term . setSource (TermVT_CF t))] _ -> [] , modules_prefix = mk $ \(n `WithFixity` fixy := t) -> case fixy of Fixity1 pre@Prefix{} -> [(n, Tokenizer pre $ Token_Term . setSource (TermVT_CF t))] _ -> [] , modules_postfix = mk $ \(n `WithFixity` fixy := t) -> case fixy of Fixity1 post@Postfix{} -> [(n, Tokenizer post $ Token_Term . setSource (TermVT_CF t))] _ -> [] } where mk :: (DefTerm src ss -> [(NameTe, Tokenizer fixy src ss)]) -> Map PathMod (Map NameTe (Tokenizer fixy src ss)) mk = Map.singleton mod . Map.fromList . (`foldMap` lst) -- ** Type 'DefTerm' data DefTerm src ss = forall vs t. (:=) (WithFixity NameTe) (forall es. Term src ss es vs t) -- ** Type 'WithFixity' data WithFixity a = WithFixity a Fixity deriving (Eq, Show) instance IsString (WithFixity NameTe) where fromString a = WithFixity (fromString a) (Fixity2 infixN5) withInfix :: a -> Infix -> WithFixity a withInfix a inf = a `WithFixity` Fixity2 inf withInfixR :: a -> Precedence -> WithFixity a withInfixR a p = a `WithFixity` Fixity2 (infixR p) withInfixL :: a -> Precedence -> WithFixity a withInfixL a p = a `WithFixity` Fixity2 (infixL p) withInfixN :: a -> Precedence -> WithFixity a withInfixN a p = a `WithFixity` Fixity2 (infixN p) withInfixB :: a -> (Side, Precedence) -> WithFixity a withInfixB a (lr, p) = a `WithFixity` Fixity2 (infixB lr p) withPrefix :: a -> Precedence -> WithFixity a withPrefix a p = a `WithFixity` Fixity1 (Prefix p) withPostfix :: a -> Precedence -> WithFixity a withPostfix a p = a `WithFixity` Fixity1 (Postfix p) -- * Class 'Gram_Name' class ( Gram_Terminal g , Gram_Rule g , Gram_Alt g , Gram_Try g , Gram_App g , Gram_AltApp g , Gram_RegL g , Gram_CF g , Gram_Comment g , Gram_Op g ) => Gram_Name g where g_mod_path :: CF g PathMod g_mod_path = rule "mod_path" $ infixrG (pure <$> g_mod_name) (op <$ char '.') where op = (<>) g_mod_name :: CF g NameMod g_mod_name = rule "mod_name" $ (NameMod . Text.pack <$>) $ (identG `minus`) $ Fun.const <$> g_term_keywords <*. (any `but` g_term_idname_tail) where identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail) headG = unicat $ Unicat Char.UppercaseLetter g_term_mod_name :: CF g (Mod NameTe) g_term_mod_name = rule "term_mod_name" $ lexeme $ g_term_mod_idname <+> parens g_term_mod_opname g_term_name :: CF g NameTe g_term_name = rule "term_name" $ lexeme $ g_term_idname <+> parens g_term_opname g_term_mod_idname :: CF g (Mod NameTe) g_term_mod_idname = rule "term_mod_idname" $ Mod <$> option [] (try $ g_mod_path <* char '.') <*> g_term_idname g_term_idname :: CF g NameTe g_term_idname = rule "term_idname" $ (NameTe . Text.pack <$>) $ (identG `minus`) $ Fun.const <$> g_term_keywords <*. (any `but` g_term_idname_tail) where identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail) headG = unicat $ Unicat_Letter g_term_idname_tail :: Terminal g Char g_term_idname_tail = rule "term_idname_tail" $ unicat Unicat_Letter <+> unicat Unicat_Number g_term_keywords :: Reg rl g String g_term_keywords = rule "term_keywords" $ choice $ string <$> ["in", "let"] g_term_mod_opname :: CF g (Mod NameTe) g_term_mod_opname = rule "term_mod_opname" $ Mod <$> option [] (try $ g_mod_path <* char '.') <*> g_term_opname g_term_opname :: CF g NameTe g_term_opname = rule "term_opname" $ (NameTe . Text.pack <$>) $ (symG `minus`) $ Fun.const <$> g_term_keysyms <*. (any `but` g_term_opname_ok) where symG = some $ cf_of_Terminal g_term_opname_ok g_term_opname_ok :: Terminal g Char g_term_opname_ok = rule "term_opname_ok" $ choice (unicat <$> [ Unicat_Symbol , Unicat_Punctuation , Unicat_Mark ]) `but` koG where koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']']) g_term_keysyms :: Reg rl g String g_term_keysyms = rule "term_keysyms" $ choice $ string <$> ["\\", "->", "=", "@"] deriving instance Gram_Name g => Gram_Name (CF g) instance Gram_Name EBNF instance Gram_Name RuleEBNF -- * Class 'Gram_Term_Type' class ( Gram_Terminal g , Gram_Rule g , Gram_Meta src g , Gram_Alt g , Gram_AltApp g , Gram_App g , Gram_CF g , Gram_Comment g , Gram_Name g , Gram_Type src g ) => Gram_Term_Type src g where g_term_abst_decl :: CF g (NameTe, AST_Type src) g_term_abst_decl = rule "term_abst_decl" $ parens $ (,) <$> g_term_name <* (symbol "::" <+> symbol ":") -- NOTE: "::" is Haskell compatibility and ":" is another common notation. <*> g_type deriving instance Gram_Term_Type src g => Gram_Term_Type src (CF g) instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src EBNF instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src RuleEBNF -- ** Type 'Error_Term_Gram' data Error_Term_Gram = Error_Term_Gram_Fixity Error_Fixity | Error_Term_Gram_Fixity_Need FixityPos | Error_Term_Gram_Term_incomplete | Error_Term_Gram_Type_applied_to_nothing | Error_Term_Gram_not_applicable | Error_Term_Gram_application | Error_Term_Gram_application_mismatch deriving (Eq, Show) -- *** Type 'FixityPos' data FixityPos = FixityPos_Prefix | FixityPos_Infix | FixityPos_Postfix deriving (Eq, Show) -- * Class 'Gram_Term' class ( Gram_Meta src g , Gram_Error Error_Term_Gram g , Gram_Terminal g , Gram_Rule g , Gram_Alt g , Gram_App g , Gram_AltApp g , Gram_CF g , Gram_Comment g , Gram_Type src g , Gram_Name g , Gram_Term_Type src g , Gram_Term_Atoms src ss g , Show src ) => Gram_Term src ss g where modules_get :: CF g (Modules src ss -> a) -> CF g a modules_put :: CF g (Modules src ss, a) -> CF g a g_term :: CF g (AST_Term src ss) g_term = rule "term" $ choice [ try g_term_abst , g_term_operators , g_term_let ] g_term_operators :: CF g (AST_Term src ss) g_term_operators = rule "term_operators" $ catch $ left Error_Term_Gram_Fixity <$> g_ops where g_ops :: CF g (Either Error_Fixity (AST_Term src ss)) g_ops = operators g_term_atom g_prefix g_infix g_postfix g_prefix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss) g_infix :: CF g (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss) g_postfix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss) g_prefix = catch $ withMeta $ modules_get $ op_prefix <$> g_prefix_op g_infix = catch $ withMeta $ modules_get $ op_infix <$> g_infix_op g_postfix = catch $ withMeta $ modules_get $ op_postfix <$> g_postfix_op op_infix :: Mod NameTe -> Modules src ss -> src -> Either Error_Term_Gram (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss) op_infix name toks src = do let (_pre, in_, _post) = modulesLookup name toks case in_ of Nothing -> Left $ Error_Term_Gram_Fixity_Need FixityPos_Infix Just p -> Right $ (token_fixity p,) $ \a b -> (BinTree0 (token_term p src) `BinTree2` a) `BinTree2` b op_prefix, op_postfix :: Mod NameTe -> Modules src ss -> src -> Either Error_Term_Gram ( Unifix , AST_Term src ss -> AST_Term src ss ) op_prefix name toks src = do let (pre, _in_, _post) = modulesLookup name toks case pre of Nothing -> Left $ Error_Term_Gram_Fixity_Need FixityPos_Prefix Just p -> Right $ (token_fixity p,) $ \a -> BinTree0 (token_term p src) `BinTree2` a op_postfix name toks src = do let (_pre, _in_, post) = modulesLookup name toks case post of Nothing -> Left $ Error_Term_Gram_Fixity_Need FixityPos_Postfix Just p -> Right $ (token_fixity p,) $ \a -> BinTree0 (token_term p src) `BinTree2` a g_postfix_op :: CF g (Mod NameTe) g_postfix_op = rule "term_op_postfix" $ lexeme $ g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote) g_term_mod_opname g_infix_op :: CF g (Mod NameTe) g_infix_op = rule "term_op_infix" $ lexeme $ between g_backquote g_backquote g_term_mod_idname <+> try (Fun.const <$> g_term_mod_opname <*> (string " " <+> string "\n")) <+> pure (Mod [] " ") g_prefix_op :: CF g (Mod NameTe) g_prefix_op = rule "term_op_prefix" $ lexeme $ g_term_mod_idname <* g_backquote <+> g_term_mod_opname g_backquote :: Gram_Terminal g' => g' Char g_backquote = char '`' g_term_atom :: CF g (AST_Term src ss) g_term_atom = rule "term_atom" $ choice $ {-(try ( withMeta $ (\typ src -> BinTree0 $ inj_EToken src $ Token_Term_Type typ) <$ char '@' <*> g_type) :) $ -} (try <$> g_term_atomsR (Proxy @ss)) <> [ try $ catch $ withMeta $ modules_get $ (\mn toks src -> do let (_, in_, _) = modulesLookup mn toks case in_ of Just p -> Right $ BinTree0 $ token_term p src Nothing -> case mn of [] `Mod` n -> Right $ BinTree0 $ Token_Term_Var src n _ -> Left $ Error_Term_Gram_Fixity_Need FixityPos_Infix ) <$> g_term_mod_name , g_term_group ] g_term_group :: CF g (AST_Term src ss) g_term_group = rule "term_group" $ parens g_term g_term_abst :: CF g (AST_Term src ss) g_term_abst = rule "term_abst" $ withMeta $ ((\(xs, te) src -> foldr (\(x, ty_x) -> BinTree0 . Token_Term_Abst src x ty_x) te xs) <$>) $ g_term_abst_args_body (symbol "\\" *> some g_term_abst_decl <* symbol "->") g_term g_term_abst_args_body :: CF g [(NameTe, AST_Type src)] -> CF g (AST_Term src ss) -> CF g ([(NameTe, AST_Type src)], AST_Term src ss) -- g_term_abst_args_body args body = (,) <$> args <*> body g_term_abst_args_body cf_args cf_body = modules_put $ modules_get $ (\a b (toks::Modules src ss) -> (toks, (a, b))) <$> (modules_put $ modules_get $ (\args (toks::Modules src ss) -> (,args) Modules { modules_prefix = del (modules_prefix toks) args , modules_infix = ins (modules_infix toks) args , modules_postfix = del (modules_postfix toks) args }) <$> cf_args) <*> cf_body where del = foldr $ \(n, _) -> Map.adjust (Map.delete n) [] ins = foldr $ \(n, _) -> Map.insertWith (<>) [] $ Map.singleton n Tokenizer { token_term = \src -> Token_Term_Var src n , token_fixity = infixN5 } g_term_let :: CF g (AST_Term src ss) g_term_let = rule "term_let" $ withMeta $ (\name args bound body src -> BinTree0 $ Token_Term_Let src name (foldr (\(x, ty_x) -> BinTree0 . Token_Term_Abst src x ty_x) bound args) body) <$ symbol "let" <*> g_term_name <*> many g_term_abst_decl <* symbol "=" <*> g_term <* symbol "in" <*> g_term deriving instance ( Gram_Term src ss g , Gram_Term_Atoms src ss (CF g) , Show src ) => Gram_Term src ss (CF g) instance ( Gram_Term_Atoms src ss EBNF , Inj_Source (Text_of_Source src) src , Show src ) => Gram_Term src ss EBNF where modules_get (CF (EBNF g)) = CF $ EBNF g modules_put (CF (EBNF g)) = CF $ EBNF g instance ( Gram_Term_Atoms src ss RuleEBNF , Inj_Source (Text_of_Source src) src , Show src ) => Gram_Term src ss RuleEBNF where modules_get (CF (RuleEBNF (EBNF g))) = CF $ RuleEBNF $ EBNF g modules_put (CF (RuleEBNF (EBNF g))) = CF $ RuleEBNF $ EBNF g -- ** Class 'Gram_Term_Atoms' type Gram_Term_Atoms src ss g = Gram_Term_AtomsR src ss ss g -- *** Class 'Gram_Term_AtomsR' class Gram_Term_AtomsR src (ss::[*]) (rs::[*]) g where g_term_atomsR :: Proxy rs -> [CF g (AST_Term src ss)] instance Gram_Term_AtomsR src ss '[] g where g_term_atomsR _rs = [] instance ( Gram_Term_AtomsFor src ss g t , Gram_Term_AtomsR src ss rs g ) => Gram_Term_AtomsR src ss (Proxy t ': rs) g where g_term_atomsR _ = g_term_atomsFor (Proxy @t) <> g_term_atomsR (Proxy @rs) -- *** Class 'Gram_Term_AtomsFor' class Gram_Term_AtomsFor src ss g t where g_term_atomsFor :: Proxy t -> [CF g (AST_Term src ss)] g_term_atomsFor _t = [] gram_term :: forall g. ( Gram_Term () '[Proxy (->), Proxy Integer] g ) => [CF g ()] gram_term = [ voiD g_term , voiD g_term_operators , voiD g_term_atom , voiD g_term_group , voiD g_term_abst , void (g_term_abst_decl::CF g (NameTe, AST_Type ())) , voiD g_term_let , void g_term_mod_name , void g_term_name , void g_term_idname , void $ cf_of_Terminal g_term_idname_tail , void $ cf_of_Reg g_term_keywords , void g_term_mod_opname , void g_term_opname , void $ cf_of_Terminal g_term_opname_ok , void $ cf_of_Reg g_term_keysyms ] where voiD :: CF g (AST_Term () '[Proxy (->), Proxy Integer]) -> CF g () voiD = (() <$)