1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
10 import Control.Arrow (left)
11 import Control.Monad (foldM, void, (=<<))
12 import qualified Data.Char as Char
13 import qualified Data.Function as Fun
14 import Data.Map.Strict (Map)
15 import qualified Data.Map.Strict as Map
16 import Data.Proxy (Proxy(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.String (IsString(..))
19 import Data.Text (Text)
20 import qualified Data.Text as Text
21 import Prelude hiding (mod, not, any)
23 import Language.Symantic.Parsing
24 import Language.Symantic.Typing
27 newtype TeName = TeName Text
28 deriving (Eq, Ord, Show)
29 instance IsString TeName where
30 fromString = TeName . fromString
33 -- | Proto 'EToken'. It's almost like a free monad,
34 -- but with more constructors to avoid the need
35 -- for sub constructors when requiring a type rather than a term.
37 -- NOTE: this type may one day be removed
38 -- if proper polymorphic types are implemented.
39 -- In the meantime it is used to require
40 -- term or type arguments needed to build
41 -- the 'EToken's of polymorphic terms.
43 = ProTokLam (EToken meta ts -> ProTok meta ts)
44 -- ^ Require a term argument.
45 | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts)
46 -- ^ Require a type argument.
47 | ProTokTe (EToken meta ts)
48 -- ^ Immediat term, no need for any argument.
49 | ProTokTy (EToken meta '[Proxy Token_Type])
50 -- ^ Immediat type, no need for any argument.
52 -- ^ Require a term argument to perform a 'Token_Term_App'.
54 -- Useful to make the term application symbolized by a space
55 -- be an operator, and thus be integrated with the other operators.
56 -- This enables prefix and postfix operators to be applied
57 -- before the term application.
58 instance (Show_Token meta ts, Show meta) => Show (ProTok meta ts) where
59 showsPrec n ProTokLam{} = showParen (n > 10) $ showString "ProTokLam"
60 showsPrec n ProTokPi{} = showParen (n > 10) $ showString "ProTokPi"
61 showsPrec n (ProTokTe tok) = showParen (n > 10) $ showString "ProTokTe " . showsPrec 11 tok
62 showsPrec n (ProTokTy typ) = showParen (n > 10) $ showString "ProTokTe " . showsPrec 11 typ
63 showsPrec n ProTokApp = showParen (n > 10) $ showString "ProTokApp"
65 -- | Declared here and not in @Compiling.Lambda@
66 -- to be able to use 'Token_Term_Var'.
67 data instance TokenT meta (ts::[*]) (Proxy (->))
68 = Token_Term_Abst TeName (EToken meta '[Proxy Token_Type]) (EToken meta ts)
69 | Token_Term_App (EToken meta ts) (EToken meta ts)
70 | Token_Term_Let TeName (EToken meta ts) (EToken meta ts)
71 | Token_Term_Var TeName
72 | Token_Term_Compose (EToken meta ts) (EToken meta ts)
76 = TokenizeR meta ts ts
78 -- ** Type 'Tokenizers'
79 data Tokenizers meta ts
81 { tokenizers_prefix :: Map Mod_Path (Map TeName (ProTok_Term Unifix meta ts))
82 , tokenizers_infix :: Map Mod_Path (Map TeName (ProTok_Term Infix meta ts))
83 , tokenizers_postfix :: Map Mod_Path (Map TeName (ProTok_Term Unifix meta ts))
86 ( Show (ProTok_Term Unifix meta ts)
87 , Show (ProTok_Term Infix meta ts)
88 ) => Show (Tokenizers meta ts)
89 instance Semigroup (Tokenizers meta ts) where
92 (Map.unionWith Map.union
94 (tokenizers_prefix y))
95 (Map.unionWith Map.union
98 (Map.unionWith Map.union
99 (tokenizers_postfix x)
100 (tokenizers_postfix y))
101 instance Monoid (Tokenizers meta ts) where
102 mempty = Tokenizers Map.empty Map.empty Map.empty
105 -- ** Type 'ProTok_Term'
106 data ProTok_Term fixy meta ts
108 { protok_term :: meta -> ProTok meta ts
109 , protok_fixity :: fixy
112 tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts
113 tokenizers = tokenizeR (Proxy @ts)
117 -> Either Error_Term_Gram (EToken meta ts)
118 unProTok (ProTokTe t) = Right t
119 unProTok (ProTokTy _) = Left $ Error_Term_Gram_Type_applied_to_nothing
120 unProTok _tok = Left $ Error_Term_Gram_Term_incomplete
122 -- | Lookup the given 'Mod' 'TeName' into the given 'Tokenizers',
123 -- returning an error or a 'ProTok' constructor for prefix, infix and postfix positions.
125 :: Inj_Token meta ts (->)
127 -> Tokenizers meta ts
128 -> Either Error_Term_Gram
129 ( Maybe (ProTok_Term Unifix meta ts)
130 , Maybe (ProTok_Term Infix meta ts)
131 , Maybe (ProTok_Term Unifix meta ts)
133 protok_lookup mn@(mod `Mod` n) (Tokenizers pres ins posts) = do
134 let pre = Map.lookup mod pres >>= Map.lookup n
135 let post = Map.lookup mod posts >>= Map.lookup n
140 { protok_term = \_meta -> ProTokApp
141 , protok_fixity = Infix (Just AssocL) 9
143 _ -> Map.lookup mod ins >>= Map.lookup n
144 return (pre, in_, post)
146 -- | Apply a proto-token to another.
148 -- This is required because polymorphic types are not implemented (yet),
149 -- therefore tokens for polymorphic types must have enough 'EToken's
150 -- to make them monomorphic types.
152 :: Inj_Token meta ts (->)
155 -> Either Error_Term_Gram (ProTok meta ts)
156 protok_app (ProTokLam f) (ProTokTe a) = Right $ f a
157 protok_app (ProTokPi f) (ProTokTy a) = Right $ f a
158 protok_app (ProTokTe f) (ProTokTe a) =
159 Right $ ProTokTe $ inj_EToken (meta_of f) $ -- TODO: merge (meta_of a)
161 protok_app ProTokApp f@ProTokLam{} = Right f
162 protok_app ProTokApp f@ProTokPi{} = Right f
163 protok_app ProTokApp (ProTokTe f) =
164 Right $ ProTokLam $ \a ->
165 ProTokTe $ inj_EToken (meta_of f) $ -- TODO: merge (meta_of a)
167 protok_app ProTokLam{} _ = Left $ Error_Term_Gram_application_mismatch
168 protok_app ProTokPi{} _ = Left $ Error_Term_Gram_application_mismatch
169 protok_app ProTokTe{} _ = Left $ Error_Term_Gram_not_applicable
170 protok_app ProTokTy{} _ = Left $ Error_Term_Gram_not_applicable
171 protok_app ProTokApp{} _ = Left $ Error_Term_Gram_application
173 -- ** Class 'TokenizeR'
174 class TokenizeR meta (ts::[*]) (rs::[*]) where
175 tokenizeR :: Proxy rs -> Tokenizers meta ts
176 instance TokenizeR meta ts '[] where
177 tokenizeR _rs = mempty
179 ( TokenizeT meta ts t
180 , TokenizeR meta ts rs
181 ) => TokenizeR meta ts (t ': rs) where
183 tokenizeR (Proxy @rs) <>
186 -- ** Class 'TokenizeT'
187 class TokenizeT meta ts t where
188 tokenizeT :: Proxy t -> Tokenizers meta ts
189 -- tokenizeT _t = [] `Mod` []
190 tokenizeT _t = mempty
194 -> [(TeName, ProTok_Term fix meta ts)]
195 -> Map Mod_Path (Map TeName (ProTok_Term fix meta ts))
196 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
199 :: Inj_Token meta ts t
200 => Text -> fixity -> TokenT meta ts (Proxy t)
201 -> (TeName, ProTok_Term fixity meta ts)
202 tokenize0 n protok_fixity tok =
203 (TeName n,) ProTok_Term
204 { protok_term = \meta -> ProTokTe $ inj_EToken meta $ tok
208 :: Inj_Token meta ts t
210 -> (EToken meta ts -> TokenT meta ts (Proxy t))
211 -> (TeName, ProTok_Term fixity meta ts)
212 tokenize1 n protok_fixity tok =
213 (TeName n,) ProTok_Term
214 { protok_term = \meta ->
216 ProTokTe $ inj_EToken meta $ tok a
220 :: Inj_Token meta ts t
222 -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
223 -> (TeName, ProTok_Term fixity meta ts)
224 tokenize2 n protok_fixity tok =
225 (TeName n,) ProTok_Term
226 { protok_term = \meta ->
227 ProTokLam $ \a -> ProTokLam $ \b ->
228 ProTokTe $ inj_EToken meta $ tok a b
233 :: Inj_Token meta ts t
235 -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
236 -> (TeName, ProTok_Term fixity meta ts)
237 tokenize3 n protok_fixity tok =
238 (TeName n,) ProTok_Term
239 { protok_term = \meta ->
240 ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c ->
241 ProTokTe $ inj_EToken meta $ tok a b c
246 type Mod_Path = [Mod_Name]
247 newtype Mod_Name = Mod_Name Text
248 deriving (Eq, Ord, Show)
249 data Mod a = Mod Mod_Path a
250 deriving (Eq, Functor, Ord, Show)
252 -- * Class 'Gram_Name'
265 ) => Gram_Name g where
266 g_mod_path :: CF g Mod_Path
267 g_mod_path = rule "mod_path" $
269 (pure <$> g_mod_name)
272 g_mod_name :: CF g Mod_Name
273 g_mod_name = rule "mod_name" $
274 (Mod_Name . Text.pack <$>) $
278 <*. (any `but` g_term_idname_tail)
280 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
281 headG = unicat $ Unicat Char.UppercaseLetter
283 g_term_mod_name :: CF g (Mod TeName)
284 g_term_mod_name = rule "term_mod_name" $
286 g_term_mod_idname <+>
287 parens g_term_mod_opname
288 g_term_name :: CF g TeName
289 g_term_name = rule "term_name" $
294 g_term_mod_idname :: CF g (Mod TeName)
295 g_term_mod_idname = rule "term_mod_idname" $
297 <$> option [] (try $ g_mod_path <* char '.')
299 g_term_idname :: CF g TeName
300 g_term_idname = rule "term_idname" $
301 (TeName . Text.pack <$>) $
305 <*. (any `but` g_term_idname_tail)
307 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
308 headG = unicat $ Unicat_Letter
309 g_term_idname_tail :: Terminal g Char
310 g_term_idname_tail = rule "term_idname_tail" $
311 unicat Unicat_Letter <+>
313 g_term_keywords :: Reg rl g String
314 g_term_keywords = rule "term_keywords" $
315 choice $ string <$> ["in", "let"]
317 g_term_mod_opname :: CF g (Mod TeName)
318 g_term_mod_opname = rule "term_mod_opname" $
320 <$> option [] (try $ g_mod_path <* char '.')
322 g_term_opname :: CF g TeName
323 g_term_opname = rule "term_opname" $
324 (TeName . Text.pack <$>) $
328 <*. (any `but` g_term_opname_ok)
330 symG = some $ cf_of_Terminal g_term_opname_ok
331 g_term_opname_ok :: Terminal g Char
332 g_term_opname_ok = rule "term_opname_ok" $
339 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
340 g_term_keysyms :: Reg rl g String
341 g_term_keysyms = rule "term_keysyms" $
342 choice $ string <$> ["\\", "->", "=", "@"]
344 deriving instance Gram_Name g => Gram_Name (CF g)
345 instance Gram_Name EBNF
346 instance Gram_Name RuleDef
348 -- * Class 'Gram_Term_Type'
360 ) => Gram_Term_Type meta g where
362 :: CF g (TeName, TokType meta)
363 g_term_abst_decl = rule "term_abst_decl" $
369 deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g)
370 instance Gram_Term_Type meta EBNF
371 instance Gram_Term_Type meta RuleDef
373 -- * Class 'Gram_Error'
374 class Gram_Error g where
375 term_unError :: CF g (Either Error_Term_Gram a) -> CF g a
376 deriving instance Gram_Error g => Gram_Error (CF g)
377 instance Gram_Error EBNF where
378 term_unError (CF (EBNF g)) = CF $ EBNF g
379 instance Gram_Error RuleDef where
380 term_unError (CF (RuleDef (EBNF g))) =
381 CF $ RuleDef $ EBNF g
383 -- ** Type 'Error_Term_Gram'
385 = Error_Term_Gram_Fixity Error_Fixity
386 | Error_Term_Gram_Cannot_apply_term
387 | Error_Term_Gram_Cannot_apply_type
388 | Error_Term_Gram_Undefined_term
389 | Error_Term_Gram_Term_incomplete
390 | Error_Term_Gram_Type_applied_to_nothing
391 | Error_Term_Gram_not_applicable
392 | Error_Term_Gram_application
393 | Error_Term_Gram_application_mismatch
396 -- * Class 'Gram_Term'
407 , Gram_Term_AtomsR meta ts ts g
409 , Gram_Term_Type meta g
411 , Inj_Token meta ts (->)
412 ) => Gram_Term ts meta g where
413 tokenizers_get :: CF g (Tokenizers meta ts -> a) -> CF g a
414 tokenizers_put :: CF g (Tokenizers meta ts, a) -> CF g a
417 :: Inj_Tokens meta ts '[Proxy (->)]
418 => CF g (EToken meta ts)
419 g_term = rule "term" $
426 :: Inj_Tokens meta ts '[Proxy (->)]
427 => CF g (EToken meta ts)
428 g_term_operators = rule "term_operators" $
430 ((unProTok =<<) <$>) $
432 left Error_Term_Gram_Fixity <$>
435 ops :: CF g (Either Error_Fixity
436 (Either Error_Term_Gram (ProTok meta ts)))
439 (Right <$> g_term_atom)
440 (term_unError $ metaG $ tokenizers_get $ op_prefix <$> g_term_op_prefix)
441 (term_unError $ metaG $ tokenizers_get $ op_infix <$> g_term_op_infix)
442 (term_unError $ metaG $ tokenizers_get $ op_postfix <$> g_term_op_postfix)
445 -> Tokenizers meta ts
447 -> Either Error_Term_Gram
449 , Either Error_Term_Gram (ProTok meta ts)
450 -> Either Error_Term_Gram (ProTok meta ts)
451 -> Either Error_Term_Gram (ProTok meta ts) )
452 op_infix name toks meta = do
453 (_pre, in_, _post) <- protok_lookup name toks
455 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix
457 Right $ (protok_fixity p,) $ \ma mb -> do
460 foldM protok_app (protok_term p meta) [a, b]
461 op_prefix, op_postfix
463 -> Tokenizers meta ts
465 -> Either Error_Term_Gram
467 , Either Error_Term_Gram (ProTok meta ts)
468 -> Either Error_Term_Gram (ProTok meta ts) )
469 op_prefix name toks meta = do
470 (pre, _in_, _post) <- protok_lookup name toks
472 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
474 Right $ (protok_fixity p,) $ \ma -> do
476 protok_term p meta `protok_app` a
477 op_postfix name toks meta = do
478 (_pre, _in_, post) <- protok_lookup name toks
480 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
482 Right $ (protok_fixity p,) $ \ma -> do
484 protok_term p meta `protok_app` a
485 g_term_op_postfix :: CF g (Mod TeName)
486 g_term_op_postfix = rule "term_op_postfix" $
488 g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote)
490 g_term_op_infix :: CF g (Mod TeName)
491 g_term_op_infix = rule "term_op_infix" $
493 between g_backquote g_backquote g_term_mod_idname <+>
494 try (Fun.const <$> g_term_mod_opname <*> (string " " <+> string "\n")) <+>
496 g_term_op_prefix :: CF g (Mod TeName)
497 g_term_op_prefix = rule "term_op_prefix" $
499 g_term_mod_idname <* g_backquote <+>
501 g_backquote :: Gram_Terminal g' => g' Char
502 g_backquote = char '`'
505 :: Inj_Tokens meta ts '[Proxy (->)]
506 => CF g (ProTok meta ts)
509 (try (ProTokTy <$ char '@' <*> g_type) :) $
510 try <$> gs_term_atomsR (Proxy @ts) <>
512 metaG $ term_unError $ tokenizers_get $
514 (_, in_, _) <- protok_lookup mn toks
518 [] `Mod` n -> Right $ \meta ->
519 ProTokTe $ inj_EToken meta $ Token_Term_Var n
520 _ -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix
521 Just p -> Right $ protok_term p
522 ) <$> g_term_mod_name
523 , ProTokTe <$> g_term_group
526 :: Inj_Tokens meta ts '[Proxy (->)]
527 => CF g (EToken meta ts)
528 g_term_group = rule "term_group" $ parens g_term
530 :: Inj_Tokens meta ts '[Proxy (->)]
531 => CF g (EToken meta ts)
532 g_term_abst = rule "term_abst" $
537 Token_Term_Abst x ty_x) te xs) <$>) $
538 g_term_abst_args_body
539 (symbol "\\" *> some g_term_abst_decl <* symbol "->")
541 g_term_abst_args_body
542 :: CF g [(TeName, TokType meta)]
543 -> CF g (EToken meta ts)
544 -> CF g ([(TeName, TokType meta)], EToken meta ts)
545 -- g_term_abst_args_body args body = (,) <$> args <*> body
546 g_term_abst_args_body cf_args cf_body =
547 tokenizers_put $ tokenizers_get $
548 (\a b (toks::Tokenizers meta ts) -> (toks, (a, b)))
549 <$> (tokenizers_put $ tokenizers_get $
550 (\args (toks::Tokenizers meta ts) -> (,args)
552 { tokenizers_prefix = del (tokenizers_prefix toks) args
553 , tokenizers_infix = ins (tokenizers_infix toks) args
554 , tokenizers_postfix = del (tokenizers_postfix toks) args
558 del = foldr $ \(n, _) -> Map.adjust (Map.delete n) []
559 ins = foldr $ \(n, _) ->
560 Map.insertWith (<>) [] $
563 { protok_term = \meta -> ProTokTe $ inj_EToken meta $ Token_Term_Var n
564 , protok_fixity = infixN5
567 :: Inj_Tokens meta ts '[Proxy (->)]
568 => CF g (EToken meta ts)
569 g_term_let = rule "term_let" $
571 (\name args bound body meta ->
574 (foldr (\(x, ty_x) ->
575 inj_EToken meta . Token_Term_Abst x ty_x) bound args) body)
578 <*> many g_term_abst_decl
585 ( Gram_Term ts meta g
586 , Gram_Term_AtomsR meta ts ts (CF g)
587 ) => Gram_Term ts meta (CF g)
589 ( Gram_Term_AtomsR meta ts ts EBNF
590 , Inj_Token meta ts (->)
591 ) => Gram_Term ts meta EBNF where
592 tokenizers_get (CF (EBNF g)) = CF $ EBNF g
593 tokenizers_put (CF (EBNF g)) = CF $ EBNF g
595 ( Gram_Term_AtomsR meta ts ts RuleDef
596 , Inj_Token meta ts (->)
597 ) => Gram_Term ts meta RuleDef where
598 tokenizers_get (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g
599 tokenizers_put (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g
601 -- ** Class 'Gram_Term_AtomsR'
602 class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where
603 gs_term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)]
604 instance Gram_Term_AtomsR meta ts '[] g where
605 gs_term_atomsR _rs = []
607 ( Gram_Term_AtomsT meta ts t g
608 , Gram_Term_AtomsR meta ts rs g
609 ) => Gram_Term_AtomsR meta ts (t ': rs) g where
611 gs_term_atomsT (Proxy @t) <>
612 gs_term_atomsR (Proxy @rs)
614 -- ** Class 'Gram_Term_AtomsT'
615 class Gram_Term_AtomsT meta ts t g where
616 gs_term_atomsT :: Proxy t -> [CF g (ProTok meta ts)]
617 gs_term_atomsT _t = []
618 -- instance Gram_Term_AtomsT meta ts t RuleDef
622 ( Gram_Term '[Proxy (->), Proxy Integer] () g
626 , ue g_term_operators
630 , void (g_term_abst_decl::CF g (TeName, TokType ()))
632 , void g_term_mod_name
635 , void $ cf_of_Terminal g_term_idname_tail
636 , void $ cf_of_Reg g_term_keywords
637 , void g_term_mod_opname
639 , void $ cf_of_Terminal g_term_opname_ok
640 , void $ cf_of_Reg g_term_keysyms
642 up :: CF g (ProTok () '[Proxy (->), Proxy Integer]) -> CF g ()
644 ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g ()