1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
10 import Control.Arrow (left)
11 import Control.Monad (void)
12 import Data.Map.Strict (Map)
13 import Data.Proxy (Proxy(..))
14 import Data.Semigroup (Semigroup(..))
15 import Data.String (IsString(..))
16 import Data.Text (Text)
17 import Prelude hiding (mod, not, any)
18 import qualified Data.Char as Char
19 import qualified Data.Function as Fun
20 import qualified Data.Map.Strict as Map
21 import qualified Data.Text as Text
23 import Language.Symantic.Parsing
24 import Language.Symantic.Typing
27 newtype TeName = TeName Text
28 deriving (Eq, Ord, Show)
29 instance IsString TeName where
30 fromString = TeName . fromString
33 type Mod_Path = [Mod_Name]
34 newtype Mod_Name = Mod_Name Text
35 deriving (Eq, Ord, Show)
36 data Mod a = Mod Mod_Path a
37 deriving (Eq, Functor, Ord, Show)
40 -- type AST_Term src ts = BinTree (EToken src ts)
42 -- NOTE: Declared here rather than in @Lib.Lambda@ to be able to use them here.
43 data instance TokenT src (ts::[*]) (Proxy (->))
44 = Token_Term_Abst TeName (AST_Type src) (AST_Term src ts)
46 | Token_Term_Let TeName (AST_Term src ts) (AST_Term src ts)
47 | Token_Term_Var TeName
48 | Token_Term_Type (AST_Type src)
52 (~>) :: Inj_TyConst cs (->) => Type cs a -> Type cs b -> Type cs (a -> b)
53 (~>) a b = ty @(->) :$ a :$ b
56 -- (.) :: (b -> c) -> (a -> b) -> a -> c
57 ty_compose :: Inj_TyConsts cs [Proxy (->), Proxy Monad] => Type cs (Any::Void)
63 (b ~> c) ~> (a ~> b) ~> a ~> c
65 -- ($) :: (a -> b) -> a -> b
66 ty_app :: Inj_TyConsts cs [Proxy (->), Proxy Monad] => Type cs (Any::Void)
78 -- ** Type 'Tokenizer'
79 data Tokenizer fixy src ts
81 { tokenizer :: src -> EToken src ts
82 , tokenizer_fixity :: fixy
85 -- ** Type 'Tokenizers'
86 data Tokenizers src ts
88 { tokenizers_prefix :: Map Mod_Path (Map TeName (Tokenizer Unifix src ts))
89 , tokenizers_infix :: Map Mod_Path (Map TeName (Tokenizer Infix src ts))
90 , tokenizers_postfix :: Map Mod_Path (Map TeName (Tokenizer Unifix src ts))
94 ( Show (Tokenizer Unifix src ts)
95 , Show (Tokenizer Infix src ts)
96 ) => Show (Tokenizers src ts)
97 instance Semigroup (Tokenizers src ts) where
100 (Map.unionWith Map.union
101 (tokenizers_prefix x)
102 (tokenizers_prefix y))
103 (Map.unionWith Map.union
105 (tokenizers_infix y))
106 (Map.unionWith Map.union
107 (tokenizers_postfix x)
108 (tokenizers_postfix y))
109 instance Monoid (Tokenizers src ts) where
110 mempty = Tokenizers Map.empty Map.empty Map.empty
113 tokenizers :: forall src ts. Tokenize src ts => Tokenizers src ts
114 tokenizers = tokenizeR (Proxy @ts)
119 -> Either Error_Term_Gram (EToken src ts)
120 unProTok (ProTokTe t) = Right t
121 unProTok (ProTokTy _) = Left $ Error_Term_Gram_Type_applied_to_nothing
122 unProTok _tok = Left $ Error_Term_Gram_Term_incomplete
125 -- | Lookup the given 'Mod' 'TeName' into the given 'Tokenizers',
126 -- returning for prefix, infix and postfix positions, when there is a match.
128 :: Inj_Token src ts (->)
131 -> ( Maybe (Tokenizer Unifix src ts)
132 , Maybe (Tokenizer Infix src ts)
133 , Maybe (Tokenizer Unifix src ts)
135 tokenizer_lookup mn@(mod `Mod` n) (Tokenizers pres ins posts) = do
136 let pre = Map.lookup mod pres >>= Map.lookup n
137 let post = Map.lookup mod posts >>= Map.lookup n
142 { tokenizer = \src -> inj_EToken src Token_Term_App
143 , tokenizer_fixity = Infix (Just AssocL) 9
145 _ -> Map.lookup mod ins >>= Map.lookup n
149 -- | Apply a proto-token to another.
151 -- This is required because polymorphic types are not implemented (yet),
152 -- therefore tokens for polymorphic types must have enough 'EToken's
153 -- to make them monomorphic types.
155 :: Inj_Token src ts (->)
158 -> Either Error_Term_Gram (ProTok src ts)
159 protok_app (ProTokLam f) (ProTokTe a) = Right $ f a
160 protok_app (ProTokPi f) (ProTokTy a) = Right $ f a
161 protok_app (ProTokTe f) (ProTokTe a) =
162 Right $ ProTokTe $ inj_EToken (meta_of f) $ -- TODO: merge (meta_of a)
164 protok_app ProTokApp f@ProTokLam{} = Right f
165 protok_app ProTokApp f@ProTokPi{} = Right f
166 protok_app ProTokApp (ProTokTe f) =
167 Right $ ProTokLam $ \a ->
168 ProTokTe $ inj_EToken (meta_of f) $ -- TODO: merge (meta_of a)
170 protok_app ProTokLam{} _ = Left $ Error_Term_Gram_application_mismatch
171 protok_app ProTokPi{} _ = Left $ Error_Term_Gram_application_mismatch
172 protok_app ProTokTe{} _ = Left $ Error_Term_Gram_not_applicable
173 protok_app ProTokTy{} _ = Left $ Error_Term_Gram_not_applicable
174 protok_app ProTokApp{} _ = Left $ Error_Term_Gram_application
177 -- ** Class 'TokenizeR'
178 class TokenizeR src (ts::[*]) (rs::[*]) where
179 tokenizeR :: Proxy rs -> Tokenizers src ts
180 instance TokenizeR src ts '[] where
181 tokenizeR _rs = mempty
184 , TokenizeR src ts rs
185 ) => TokenizeR src ts (t ': rs) where
187 tokenizeR (Proxy @rs) <>
190 -- ** Class 'TokenizeT'
191 class TokenizeT src ts t where
192 tokenizeT :: Proxy t -> Tokenizers src ts
193 -- tokenizeT _t = [] `Mod` []
194 tokenizeT _t = mempty
199 -> [(TeName, Tokenizer fix src ts)]
200 -> Map Mod_Path (Map TeName (Tokenizer fix src ts))
201 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
204 :: Inj_Token src ts t
205 => Text -> fixity -> TokenT src ts (Proxy t)
206 -> (TeName, Tokenizer fixity src ts)
207 tokenize0 n tokenizer_fixity tok =
208 (TeName n,) Tokenizer
209 { tokenizer = \src -> inj_EToken src tok
214 :: Inj_Token src ts t
216 -> (EToken src ts -> TokenT src ts (Proxy t))
217 -> (TeName, Tokenizer fixity src ts)
218 tokenize1 n tokenizer_fixity tok =
219 (TeName n,) Tokenizer
220 { tokenizer = \src ->
222 ProTokTe $ inj_EToken src $ tok a
226 :: Inj_Token src ts t
228 -> (EToken src ts -> EToken src ts -> TokenT src ts (Proxy t))
229 -> (TeName, Tokenizer fixity src ts)
230 tokenize2 n tokenizer_fixity tok =
231 (TeName n,) Tokenizer
232 { tokenizer = \src ->
233 ProTokLam $ \a -> ProTokLam $ \b ->
234 ProTokTe $ inj_EToken src $ tok a b
239 :: Inj_Token src ts t
241 -> (EToken src ts -> EToken src ts -> EToken src ts -> TokenT src ts (Proxy t))
242 -> (TeName, Tokenizer fixity src ts)
243 tokenize3 n tokenizer_fixity tok =
244 (TeName n,) Tokenizer
245 { tokenizer = \src ->
246 ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c ->
247 ProTokTe $ inj_EToken src $ tok a b c
252 -- * Class 'Gram_Name'
265 ) => Gram_Name g where
266 g_mod_path :: CF g Mod_Path
267 g_mod_path = rule "mod_path" $
269 (pure <$> g_mod_name)
272 g_mod_name :: CF g Mod_Name
273 g_mod_name = rule "mod_name" $
274 (Mod_Name . Text.pack <$>) $
278 <*. (any `but` g_term_idname_tail)
280 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
281 headG = unicat $ Unicat Char.UppercaseLetter
283 g_term_mod_name :: CF g (Mod TeName)
284 g_term_mod_name = rule "term_mod_name" $
286 g_term_mod_idname <+>
287 parens g_term_mod_opname
288 g_term_name :: CF g TeName
289 g_term_name = rule "term_name" $
294 g_term_mod_idname :: CF g (Mod TeName)
295 g_term_mod_idname = rule "term_mod_idname" $
297 <$> option [] (try $ g_mod_path <* char '.')
299 g_term_idname :: CF g TeName
300 g_term_idname = rule "term_idname" $
301 (TeName . Text.pack <$>) $
305 <*. (any `but` g_term_idname_tail)
307 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
308 headG = unicat $ Unicat_Letter
309 g_term_idname_tail :: Terminal g Char
310 g_term_idname_tail = rule "term_idname_tail" $
311 unicat Unicat_Letter <+>
313 g_term_keywords :: Reg rl g String
314 g_term_keywords = rule "term_keywords" $
315 choice $ string <$> ["in", "let"]
317 g_term_mod_opname :: CF g (Mod TeName)
318 g_term_mod_opname = rule "term_mod_opname" $
320 <$> option [] (try $ g_mod_path <* char '.')
322 g_term_opname :: CF g TeName
323 g_term_opname = rule "term_opname" $
324 (TeName . Text.pack <$>) $
328 <*. (any `but` g_term_opname_ok)
330 symG = some $ cf_of_Terminal g_term_opname_ok
331 g_term_opname_ok :: Terminal g Char
332 g_term_opname_ok = rule "term_opname_ok" $
339 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
340 g_term_keysyms :: Reg rl g String
341 g_term_keysyms = rule "term_keysyms" $
342 choice $ string <$> ["\\", "->", "=", "@"]
344 deriving instance Gram_Name g => Gram_Name (CF g)
345 instance Gram_Name EBNF
346 instance Gram_Name RuleDef
348 -- * Class 'Gram_Term_Type'
360 ) => Gram_Term_Type src g where
362 :: CF g (TeName, AST_Type src)
363 g_term_abst_decl = rule "term_abst_decl" $
369 deriving instance Gram_Term_Type src g => Gram_Term_Type src (CF g)
370 instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src EBNF
371 instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src RuleDef
373 -- * Class 'Gram_Error'
374 class Gram_Error g where
375 errorG :: CF g (Either Error_Term_Gram a) -> CF g a
376 deriving instance Gram_Error g => Gram_Error (CF g)
377 instance Gram_Error EBNF where
378 errorG (CF (EBNF g)) = CF $ EBNF g
379 instance Gram_Error RuleDef where
380 errorG (CF (RuleDef (EBNF g))) =
381 CF $ RuleDef $ EBNF g
383 -- ** Type 'Error_Term_Gram'
385 = Error_Term_Gram_Fixity Error_Fixity
386 | Error_Term_Gram_Term_incomplete
387 | Error_Term_Gram_Type_applied_to_nothing
388 | Error_Term_Gram_not_applicable
389 | Error_Term_Gram_application
390 | Error_Term_Gram_application_mismatch
393 -- * Class 'Gram_Term'
404 , Gram_Term_AtomsR src ts ts g
406 , Gram_Term_Type src g
408 , Inj_Token src ts (->)
409 ) => Gram_Term ts src g where
410 tokenizers_get :: CF g (Tokenizers src ts -> a) -> CF g a
411 tokenizers_put :: CF g (Tokenizers src ts, a) -> CF g a
413 :: Inj_Token src ts (->)
414 => CF g (AST_Term src ts)
415 g_term = rule "term" $
422 :: Inj_Token src ts (->)
423 => CF g (AST_Term src ts)
424 g_term_operators = rule "term_operators" $
426 left Error_Term_Gram_Fixity <$>
429 g_ops :: CF g (Either Error_Fixity (AST_Term src ts))
430 g_ops = operators g_term_atom g_prefix g_infix g_postfix
431 g_prefix :: CF g (Unifix, AST_Term src ts -> AST_Term src ts)
432 g_infix :: CF g (Infix, AST_Term src ts -> AST_Term src ts -> AST_Term src ts)
433 g_postfix :: CF g (Unifix, AST_Term src ts -> AST_Term src ts)
434 g_prefix = errorG $ metaG $ tokenizers_get $ op_prefix <$> g_prefix_op
435 g_infix = errorG $ metaG $ tokenizers_get $ op_infix <$> g_infix_op
436 g_postfix = errorG $ metaG $ tokenizers_get $ op_postfix <$> g_postfix_op
441 -> Either Error_Term_Gram
442 (Infix, AST_Term src ts -> AST_Term src ts -> AST_Term src ts)
443 op_infix name toks src = do
444 let (_pre, in_, _post) = tokenizer_lookup name toks
446 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix
448 Right $ (tokenizer_fixity p,) $ \a b ->
449 (BinTree0 (tokenizer p src) `BinTree1` a) `BinTree1` b
450 op_prefix, op_postfix
454 -> Either Error_Term_Gram
456 , AST_Term src ts -> AST_Term src ts )
457 op_prefix name toks src = do
458 let (pre, _in_, _post) = tokenizer_lookup name toks
460 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
462 Right $ (tokenizer_fixity p,) $ \a ->
463 BinTree0 (tokenizer p src) `BinTree1` a
464 op_postfix name toks src = do
465 let (_pre, _in_, post) = tokenizer_lookup name toks
467 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
469 Right $ (tokenizer_fixity p,) $ \a ->
470 BinTree0 (tokenizer p src) `BinTree1` a
471 g_postfix_op :: CF g (Mod TeName)
472 g_postfix_op = rule "term_op_postfix" $
474 g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote)
476 g_infix_op :: CF g (Mod TeName)
477 g_infix_op = rule "term_op_infix" $
479 between g_backquote g_backquote g_term_mod_idname <+>
480 try (Fun.const <$> g_term_mod_opname <*> (string " " <+> string "\n")) <+>
482 g_prefix_op :: CF g (Mod TeName)
483 g_prefix_op = rule "term_op_prefix" $
485 g_term_mod_idname <* g_backquote <+>
487 g_backquote :: Gram_Terminal g' => g' Char
488 g_backquote = char '`'
491 :: Inj_Token src ts (->)
492 => CF g (AST_Term src ts)
493 g_term_atom = rule "term_atom" $
497 (\typ src -> BinTree0 $ inj_EToken src $ Token_Term_Type typ)
498 <$ char '@' <*> g_type) :) $
499 (try . (BinTree0 <$>) <$> gs_term_atomsR (Proxy @ts)) <>
501 errorG $ metaG $ tokenizers_get $
503 let (_, in_, _) = tokenizer_lookup mn toks
505 Just p -> Right $ BinTree0 $ tokenizer p src
508 [] `Mod` n -> Right $ BinTree0 $ inj_EToken src $ Token_Term_Var n
509 _ -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix
510 ) <$> g_term_mod_name
514 :: Inj_Token src ts (->)
515 => CF g (AST_Term src ts)
516 g_term_group = rule "term_group" $ parens g_term
518 :: Inj_Token src ts (->)
519 => CF g (AST_Term src ts)
520 g_term_abst = rule "term_abst" $
526 Token_Term_Abst x ty_x) te xs) <$>) $
527 g_term_abst_args_body
528 (symbol "\\" *> some g_term_abst_decl <* symbol "->")
530 g_term_abst_args_body
531 :: CF g [(TeName, AST_Type src)]
532 -> CF g (AST_Term src ts)
533 -> CF g ([(TeName, AST_Type src)], AST_Term src ts)
534 -- g_term_abst_args_body args body = (,) <$> args <*> body
535 g_term_abst_args_body cf_args cf_body =
536 tokenizers_put $ tokenizers_get $
537 (\a b (toks::Tokenizers src ts) -> (toks, (a, b)))
538 <$> (tokenizers_put $ tokenizers_get $
539 (\args (toks::Tokenizers src ts) -> (,args)
541 { tokenizers_prefix = del (tokenizers_prefix toks) args
542 , tokenizers_infix = ins (tokenizers_infix toks) args
543 , tokenizers_postfix = del (tokenizers_postfix toks) args
547 del = foldr $ \(n, _) -> Map.adjust (Map.delete n) []
548 ins = foldr $ \(n, _) ->
549 Map.insertWith (<>) [] $
552 { tokenizer = \src -> inj_EToken src $ Token_Term_Var n
553 , tokenizer_fixity = infixN5
556 :: Inj_Token src ts (->)
557 => CF g (AST_Term src ts)
558 g_term_let = rule "term_let" $
560 (\name args bound body src ->
564 (foldr (\(x, ty_x) ->
567 Token_Term_Abst x ty_x) bound args) body)
570 <*> many g_term_abst_decl
578 , Gram_Term_AtomsR src ts ts (CF g)
579 ) => Gram_Term ts src (CF g)
581 ( Gram_Term_AtomsR src ts ts EBNF
582 , Inj_Token src ts (->)
583 , Inj_Source (Text_of_Source src) src
584 ) => Gram_Term ts src EBNF where
585 tokenizers_get (CF (EBNF g)) = CF $ EBNF g
586 tokenizers_put (CF (EBNF g)) = CF $ EBNF g
588 ( Gram_Term_AtomsR src ts ts RuleDef
589 , Inj_Token src ts (->)
590 , Inj_Source (Text_of_Source src) src
591 ) => Gram_Term ts src RuleDef where
592 tokenizers_get (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g
593 tokenizers_put (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g
595 -- ** Class 'Gram_Term_AtomsR'
596 class Gram_Term_AtomsR src (ts::[*]) (rs::[*]) g where
597 gs_term_atomsR :: Proxy rs -> [CF g (EToken src ts)]
598 instance Gram_Term_AtomsR src ts '[] g where
599 gs_term_atomsR _rs = []
601 ( Gram_Term_AtomsT src ts t g
602 , Gram_Term_AtomsR src ts rs g
603 ) => Gram_Term_AtomsR src ts (t ': rs) g where
605 gs_term_atomsT (Proxy @t) <>
606 gs_term_atomsR (Proxy @rs)
608 -- ** Class 'Gram_Term_AtomsT'
609 class Gram_Term_AtomsT src ts t g where
610 gs_term_atomsT :: Proxy t -> [CF g (EToken src ts)]
611 gs_term_atomsT _t = []
612 -- instance Gram_Term_AtomsT src ts t RuleDef
616 ( Gram_Term '[Proxy (->), Proxy Integer] () g
620 , voiD g_term_operators
624 , void (g_term_abst_decl::CF g (TeName, AST_Type ()))
626 , void g_term_mod_name
629 , void $ cf_of_Terminal g_term_idname_tail
630 , void $ cf_of_Reg g_term_keywords
631 , void g_term_mod_opname
633 , void $ cf_of_Terminal g_term_opname_ok
634 , void $ cf_of_Reg g_term_keysyms
636 voiD :: CF g (AST_Term () '[Proxy (->), Proxy Integer]) -> CF g ()