1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
10 import Control.Arrow (left)
11 import Control.Monad (foldM, void, (=<<))
12 import qualified Data.Char as Char
13 import qualified Data.Function as Fun
14 import Data.Map.Strict (Map)
15 import qualified Data.Map.Strict as Map
16 import Data.Proxy (Proxy(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.String (IsString(..))
19 import Data.Text (Text)
20 import qualified Data.Text as Text
21 import Prelude hiding (mod, not, any)
23 import Language.Symantic.Parsing
24 import Language.Symantic.Typing
27 newtype TeName = TeName Text
28 deriving (Eq, Ord, Show)
29 instance IsString TeName where
30 fromString = TeName . fromString
33 -- | Proto 'EToken'. It's almost like a free monad,
34 -- but with more constructors to avoid the need
35 -- for sub constructors when requiring a type rather than a term.
37 -- NOTE: this type may one day be removed
38 -- if proper polymorphic types are implemented.
39 -- In the meantime it is used to require
40 -- term or type arguments needed to build
41 -- the 'EToken's of polymorphic terms.
43 = ProTokLam (EToken meta ts -> ProTok meta ts)
44 -- ^ Require a term argument.
45 | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts)
46 -- ^ Require a type argument.
47 | ProTokTe (EToken meta ts)
48 -- ^ Immediat term, no need for any argument.
49 | ProTokTy (EToken meta '[Proxy Token_Type])
50 -- ^ Immediat type, no need for any argument.
52 -- ^ Require a term argument to perform a 'Token_Term_App'.
54 -- Useful to make the term application symbolized by a space
55 -- be an operator, and thus be integrated with the other operators.
56 -- This enables prefix and postfix operators to be applied
57 -- before the term application.
58 instance (Show_Token meta ts, Show meta) => Show (ProTok meta ts) where
59 showsPrec n ProTokLam{} = showParen (n > 10) $ showString "ProTokLam"
60 showsPrec n ProTokPi{} = showParen (n > 10) $ showString "ProTokPi"
61 showsPrec n (ProTokTe tok) = showParen (n > 10) $ showString "ProTokTe " . showsPrec 11 tok
62 showsPrec n (ProTokTy typ) = showParen (n > 10) $ showString "ProTokTe " . showsPrec 11 typ
63 showsPrec n ProTokApp = showParen (n > 10) $ showString "ProTokApp"
65 -- | Declared here and not in @Compiling.Lambda@
66 -- to be able to use 'Token_Term_Var'.
67 data instance TokenT meta (ts::[*]) (Proxy (->))
68 = Token_Term_Abst TeName (EToken meta '[Proxy Token_Type]) (EToken meta ts)
69 | Token_Term_App (EToken meta ts) (EToken meta ts)
70 | Token_Term_Let TeName (EToken meta ts) (EToken meta ts)
71 | Token_Term_Var TeName
72 | Token_Term_Compose (EToken meta ts) (EToken meta ts)
76 = TokenizeR meta ts ts
78 -- ** Type 'Tokenizers'
79 data Tokenizers meta ts
81 { tokenizers_prefix :: Map Mod_Path (Map TeName (ProTok_Term Unifix meta ts))
82 , tokenizers_infix :: Map Mod_Path (Map TeName (ProTok_Term Infix meta ts))
83 , tokenizers_postfix :: Map Mod_Path (Map TeName (ProTok_Term Unifix meta ts))
86 ( Show (ProTok_Term Unifix meta ts)
87 , Show (ProTok_Term Infix meta ts)
88 ) => Show (Tokenizers meta ts)
89 instance Semigroup (Tokenizers meta ts) where
92 (Map.unionWith Map.union
94 (tokenizers_prefix y))
95 (Map.unionWith Map.union
98 (Map.unionWith Map.union
99 (tokenizers_postfix x)
100 (tokenizers_postfix y))
101 instance Monoid (Tokenizers meta ts) where
102 mempty = Tokenizers Map.empty Map.empty Map.empty
105 -- ** Type 'ProTok_Term'
106 data ProTok_Term fixy meta ts
108 { protok_term :: meta -> ProTok meta ts
109 , protok_fixity :: fixy
112 tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts
113 tokenizers = tokenizeR (Proxy @ts)
117 -> Either Error_Term_Gram (EToken meta ts)
118 unProTok (ProTokTe t) = Right t
119 unProTok (ProTokTy _) = Left $ Error_Term_Gram_Type_applied_to_nothing
120 unProTok _tok = Left $ Error_Term_Gram_Term_incomplete
122 -- | Lookup the given 'Mod' 'TeName' into the given 'Tokenizers',
123 -- returning an error or a 'ProTok' constructor for prefix, infix and postfix positions.
125 :: Inj_Token meta ts (->)
127 -> Tokenizers meta ts
128 -> Either Error_Term_Gram
129 ( Maybe (ProTok_Term Unifix meta ts)
130 , Maybe (ProTok_Term Infix meta ts)
131 , Maybe (ProTok_Term Unifix meta ts)
133 protok_lookup mn@(mod `Mod` n) (Tokenizers pres ins posts) = do
134 let pre = Map.lookup mod pres >>= Map.lookup n
135 let post = Map.lookup mod posts >>= Map.lookup n
140 { protok_term = \_meta -> ProTokApp
141 , protok_fixity = Infix (Just AssocL) 9
143 _ -> Map.lookup mod ins >>= Map.lookup n
144 return (pre, in_, post)
146 -- | Apply a proto-token to another.
148 -- This is required because polymorphic types are not implemented (yet),
149 -- therefore tokens for polymorphic types must have enough 'EToken's
150 -- to make them monomorphic types.
152 :: Inj_Token meta ts (->)
155 -> Either Error_Term_Gram (ProTok meta ts)
156 protok_app (ProTokLam f) (ProTokTe a) = Right $ f a
157 protok_app (ProTokPi f) (ProTokTy a) = Right $ f a
158 protok_app (ProTokTe f) (ProTokTe a) =
159 Right $ ProTokTe $ inj_EToken (meta_of f) $ -- TODO: merge (meta_of a)
161 protok_app ProTokApp f@ProTokLam{} = Right f
162 protok_app ProTokApp f@ProTokPi{} = Right f
163 protok_app ProTokApp (ProTokTe f) =
164 Right $ ProTokLam $ \a ->
165 ProTokTe $ inj_EToken (meta_of f) $ -- TODO: merge (meta_of a)
167 protok_app ProTokLam{} _ = Left $ Error_Term_Gram_application_mismatch
168 protok_app ProTokPi{} _ = Left $ Error_Term_Gram_application_mismatch
169 protok_app ProTokTe{} _ = Left $ Error_Term_Gram_not_applicable
170 protok_app ProTokTy{} _ = Left $ Error_Term_Gram_not_applicable
171 protok_app ProTokApp{} _ = Left $ Error_Term_Gram_application
173 -- ** Class 'TokenizeR'
174 class TokenizeR meta (ts::[*]) (rs::[*]) where
175 tokenizeR :: Proxy rs -> Tokenizers meta ts
176 instance TokenizeR meta ts '[] where
177 tokenizeR _rs = mempty
179 ( TokenizeT meta ts t
180 , TokenizeR meta ts rs
181 ) => TokenizeR meta ts (t ': rs) where
183 tokenizeR (Proxy @rs) <>
186 -- ** Class 'TokenizeT'
187 class TokenizeT meta ts t where
188 tokenizeT :: Proxy t -> Tokenizers meta ts
189 -- tokenizeT _t = [] `Mod` []
190 tokenizeT _t = mempty
194 -> [(TeName, ProTok_Term fix meta ts)]
195 -> Map Mod_Path (Map TeName (ProTok_Term fix meta ts))
196 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
199 :: Inj_Token meta ts t
200 => Text -> fixity -> TokenT meta ts (Proxy t)
201 -> (TeName, ProTok_Term fixity meta ts)
202 tokenize0 n protok_fixity tok =
203 (TeName n,) ProTok_Term
204 { protok_term = \meta -> ProTokTe $ inj_EToken meta $ tok
208 :: Inj_Token meta ts t
210 -> (EToken meta ts -> TokenT meta ts (Proxy t))
211 -> (TeName, ProTok_Term fixity meta ts)
212 tokenize1 n protok_fixity tok =
213 (TeName n,) ProTok_Term
214 { protok_term = \meta ->
216 ProTokTe $ inj_EToken meta $ tok a
220 :: Inj_Token meta ts t
222 -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
223 -> (TeName, ProTok_Term fixity meta ts)
224 tokenize2 n protok_fixity tok =
225 (TeName n,) ProTok_Term
226 { protok_term = \meta ->
227 ProTokLam $ \a -> ProTokLam $ \b ->
228 ProTokTe $ inj_EToken meta $ tok a b
233 :: Inj_Token meta ts t
235 -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
236 -> (TeName, ProTok_Term fixity meta ts)
237 tokenize3 n protok_fixity tok =
238 (TeName n,) ProTok_Term
239 { protok_term = \meta ->
240 ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c ->
241 ProTokTe $ inj_EToken meta $ tok a b c
246 type Mod_Path = [Mod_Name]
247 newtype Mod_Name = Mod_Name Text
248 deriving (Eq, Ord, Show)
249 data Mod a = Mod Mod_Path a
250 deriving (Eq, Functor, Ord, Show)
252 -- * Class 'Gram_Name'
265 ) => Gram_Name g where
266 g_mod_path :: CF g Mod_Path
267 g_mod_path = rule "mod_path" $
269 (pure <$> g_mod_name)
272 g_mod_name :: CF g Mod_Name
273 g_mod_name = rule "mod_name" $
274 (Mod_Name . Text.pack <$>) $
278 <*. (any `but` g_term_idname_tail)
280 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
281 headG = unicat $ Unicat Char.UppercaseLetter
283 g_term_mod_name :: CF g (Mod TeName)
284 g_term_mod_name = rule "term_mod_name" $
286 g_term_mod_idname <+>
287 parens g_term_mod_opname
288 g_term_name :: CF g TeName
289 g_term_name = rule "term_name" $
294 g_term_mod_idname :: CF g (Mod TeName)
295 g_term_mod_idname = rule "term_mod_idname" $
297 <$> option [] (try $ g_mod_path <* char '.')
299 g_term_idname :: CF g TeName
300 g_term_idname = rule "term_idname" $
301 (TeName . Text.pack <$>) $
305 <*. (any `but` g_term_idname_tail)
307 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
308 headG = unicat $ Unicat_Letter
309 g_term_idname_tail :: Terminal g Char
310 g_term_idname_tail = rule "term_idname_tail" $
311 unicat Unicat_Letter <+>
313 g_term_keywords :: Reg rl g String
314 g_term_keywords = rule "term_keywords" $
315 choice $ string <$> ["in", "let"]
317 g_term_mod_opname :: CF g (Mod TeName)
318 g_term_mod_opname = rule "term_mod_opname" $
320 <$> option [] (try $ g_mod_path <* char '.')
322 g_term_opname :: CF g TeName
323 g_term_opname = rule "term_opname" $
324 (TeName . Text.pack <$>) $
328 <*. (any `but` g_term_opname_ok)
330 symG = some $ cf_of_Terminal g_term_opname_ok
331 g_term_opname_ok :: Terminal g Char
332 g_term_opname_ok = rule "term_opname_ok" $
339 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
340 g_term_keysyms :: Reg rl g String
341 g_term_keysyms = rule "term_keysyms" $
342 choice $ string <$> ["\\", "->", "=", "@"]
344 deriving instance Gram_Name g => Gram_Name (CF g)
345 instance Gram_Name EBNF
346 instance Gram_Name RuleDef
348 -- * Class 'Gram_Term_Type'
360 ) => Gram_Term_Type meta g where
362 :: CF g (TeName, TokType meta)
363 g_term_abst_decl = rule "term_abst_decl" $
369 deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g)
370 instance Gram_Term_Type meta EBNF
371 instance Gram_Term_Type meta RuleDef
373 -- * Class 'Gram_Error'
374 class Gram_Error g where
375 term_unError :: CF g (Either Error_Term_Gram a) -> CF g a
376 deriving instance Gram_Error g => Gram_Error (CF g)
377 instance Gram_Error EBNF where
378 term_unError (CF (EBNF g)) = CF $ EBNF g
379 instance Gram_Error RuleDef where
380 term_unError (CF (RuleDef (EBNF g))) =
381 CF $ RuleDef $ EBNF g
383 -- ** Type 'Error_Term_Gram'
385 = Error_Term_Gram_Fixity Error_Fixity
386 | Error_Term_Gram_Term_incomplete
387 | Error_Term_Gram_Type_applied_to_nothing
388 | Error_Term_Gram_not_applicable
389 | Error_Term_Gram_application
390 | Error_Term_Gram_application_mismatch
393 -- * Class 'Gram_Term'
404 , Gram_Term_AtomsR meta ts ts g
406 , Gram_Term_Type meta g
408 , Inj_Token meta ts (->)
409 ) => Gram_Term ts meta g where
410 tokenizers_get :: CF g (Tokenizers meta ts -> a) -> CF g a
411 tokenizers_put :: CF g (Tokenizers meta ts, a) -> CF g a
414 :: Inj_Tokens meta ts '[Proxy (->)]
415 => CF g (EToken meta ts)
416 g_term = rule "term" $
423 :: Inj_Tokens meta ts '[Proxy (->)]
424 => CF g (EToken meta ts)
425 g_term_operators = rule "term_operators" $
427 ((unProTok =<<) <$>) $
429 left Error_Term_Gram_Fixity <$>
432 ops :: CF g (Either Error_Fixity
433 (Either Error_Term_Gram (ProTok meta ts)))
436 (Right <$> g_term_atom)
437 (term_unError $ metaG $ tokenizers_get $ op_prefix <$> g_term_op_prefix)
438 (term_unError $ metaG $ tokenizers_get $ op_infix <$> g_term_op_infix)
439 (term_unError $ metaG $ tokenizers_get $ op_postfix <$> g_term_op_postfix)
442 -> Tokenizers meta ts
444 -> Either Error_Term_Gram
446 , Either Error_Term_Gram (ProTok meta ts)
447 -> Either Error_Term_Gram (ProTok meta ts)
448 -> Either Error_Term_Gram (ProTok meta ts) )
449 op_infix name toks meta = do
450 (_pre, in_, _post) <- protok_lookup name toks
452 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix
454 Right $ (protok_fixity p,) $ \ma mb -> do
457 foldM protok_app (protok_term p meta) [a, b]
458 op_prefix, op_postfix
460 -> Tokenizers meta ts
462 -> Either Error_Term_Gram
464 , Either Error_Term_Gram (ProTok meta ts)
465 -> Either Error_Term_Gram (ProTok meta ts) )
466 op_prefix name toks meta = do
467 (pre, _in_, _post) <- protok_lookup name toks
469 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
471 Right $ (protok_fixity p,) $ \ma -> do
473 protok_term p meta `protok_app` a
474 op_postfix name toks meta = do
475 (_pre, _in_, post) <- protok_lookup name toks
477 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
479 Right $ (protok_fixity p,) $ \ma -> do
481 protok_term p meta `protok_app` a
482 g_term_op_postfix :: CF g (Mod TeName)
483 g_term_op_postfix = rule "term_op_postfix" $
485 g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote)
487 g_term_op_infix :: CF g (Mod TeName)
488 g_term_op_infix = rule "term_op_infix" $
490 between g_backquote g_backquote g_term_mod_idname <+>
491 try (Fun.const <$> g_term_mod_opname <*> (string " " <+> string "\n")) <+>
493 g_term_op_prefix :: CF g (Mod TeName)
494 g_term_op_prefix = rule "term_op_prefix" $
496 g_term_mod_idname <* g_backquote <+>
498 g_backquote :: Gram_Terminal g' => g' Char
499 g_backquote = char '`'
502 :: Inj_Tokens meta ts '[Proxy (->)]
503 => CF g (ProTok meta ts)
504 g_term_atom = rule "term_atom" $
506 (try (ProTokTy <$ char '@' <*> g_type) :) $
507 try <$> gs_term_atomsR (Proxy @ts) <>
509 metaG $ term_unError $ tokenizers_get $
511 (_, in_, _) <- protok_lookup mn toks
515 [] `Mod` n -> Right $ \meta ->
516 ProTokTe $ inj_EToken meta $ Token_Term_Var n
517 _ -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix
518 Just p -> Right $ protok_term p
519 ) <$> g_term_mod_name
520 , ProTokTe <$> g_term_group
523 :: Inj_Tokens meta ts '[Proxy (->)]
524 => CF g (EToken meta ts)
525 g_term_group = rule "term_group" $ parens g_term
527 :: Inj_Tokens meta ts '[Proxy (->)]
528 => CF g (EToken meta ts)
529 g_term_abst = rule "term_abst" $
534 Token_Term_Abst x ty_x) te xs) <$>) $
535 g_term_abst_args_body
536 (symbol "\\" *> some g_term_abst_decl <* symbol "->")
538 g_term_abst_args_body
539 :: CF g [(TeName, TokType meta)]
540 -> CF g (EToken meta ts)
541 -> CF g ([(TeName, TokType meta)], EToken meta ts)
542 -- g_term_abst_args_body args body = (,) <$> args <*> body
543 g_term_abst_args_body cf_args cf_body =
544 tokenizers_put $ tokenizers_get $
545 (\a b (toks::Tokenizers meta ts) -> (toks, (a, b)))
546 <$> (tokenizers_put $ tokenizers_get $
547 (\args (toks::Tokenizers meta ts) -> (,args)
549 { tokenizers_prefix = del (tokenizers_prefix toks) args
550 , tokenizers_infix = ins (tokenizers_infix toks) args
551 , tokenizers_postfix = del (tokenizers_postfix toks) args
555 del = foldr $ \(n, _) -> Map.adjust (Map.delete n) []
556 ins = foldr $ \(n, _) ->
557 Map.insertWith (<>) [] $
560 { protok_term = \meta -> ProTokTe $ inj_EToken meta $ Token_Term_Var n
561 , protok_fixity = infixN5
564 :: Inj_Tokens meta ts '[Proxy (->)]
565 => CF g (EToken meta ts)
566 g_term_let = rule "term_let" $
568 (\name args bound body meta ->
571 (foldr (\(x, ty_x) ->
572 inj_EToken meta . Token_Term_Abst x ty_x) bound args) body)
575 <*> many g_term_abst_decl
582 ( Gram_Term ts meta g
583 , Gram_Term_AtomsR meta ts ts (CF g)
584 ) => Gram_Term ts meta (CF g)
586 ( Gram_Term_AtomsR meta ts ts EBNF
587 , Inj_Token meta ts (->)
588 ) => Gram_Term ts meta EBNF where
589 tokenizers_get (CF (EBNF g)) = CF $ EBNF g
590 tokenizers_put (CF (EBNF g)) = CF $ EBNF g
592 ( Gram_Term_AtomsR meta ts ts RuleDef
593 , Inj_Token meta ts (->)
594 ) => Gram_Term ts meta RuleDef where
595 tokenizers_get (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g
596 tokenizers_put (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g
598 -- ** Class 'Gram_Term_AtomsR'
599 class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where
600 gs_term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)]
601 instance Gram_Term_AtomsR meta ts '[] g where
602 gs_term_atomsR _rs = []
604 ( Gram_Term_AtomsT meta ts t g
605 , Gram_Term_AtomsR meta ts rs g
606 ) => Gram_Term_AtomsR meta ts (t ': rs) g where
608 gs_term_atomsT (Proxy @t) <>
609 gs_term_atomsR (Proxy @rs)
611 -- ** Class 'Gram_Term_AtomsT'
612 class Gram_Term_AtomsT meta ts t g where
613 gs_term_atomsT :: Proxy t -> [CF g (ProTok meta ts)]
614 gs_term_atomsT _t = []
615 -- instance Gram_Term_AtomsT meta ts t RuleDef
619 ( Gram_Term '[Proxy (->), Proxy Integer] () g
623 , ue g_term_operators
627 , void (g_term_abst_decl::CF g (TeName, TokType ()))
629 , void g_term_mod_name
632 , void $ cf_of_Terminal g_term_idname_tail
633 , void $ cf_of_Reg g_term_keywords
634 , void g_term_mod_opname
636 , void $ cf_of_Terminal g_term_opname_ok
637 , void $ cf_of_Reg g_term_keysyms
639 up :: CF g (ProTok () '[Proxy (->), Proxy Integer]) -> CF g ()
641 ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g ()