1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
10 import Control.Arrow (left)
11 import Control.Monad (void)
12 import Data.Map.Strict (Map)
13 import Data.Proxy (Proxy(..))
14 import Data.Semigroup (Semigroup(..))
15 import Data.Text (Text)
16 import Prelude hiding (mod, not, any)
17 import qualified Data.Char as Char
18 import qualified Data.Function as Fun
19 import qualified Data.Map.Strict as Map
20 import qualified Data.Text as Text
22 import Language.Symantic.Parsing
23 import Language.Symantic.Typing
26 type Mod_Path = [Mod_Name]
27 newtype Mod_Name = Mod_Name Text
28 deriving (Eq, Ord, Show)
29 data Mod a = Mod Mod_Path a
30 deriving (Eq, Functor, Ord, Show)
33 -- type AST_Term src ss = BinTree (EToken src ss)
34 type AST_Term src ss = BinTree (Token_Term src ss)
36 -- ** Type 'Token_Term'
37 data Token_Term src ss
38 = Token_Term (EToken src ss)
39 | Token_Term_Abst src TeName (AST_Type src) (AST_Term src ss)
40 | Token_Term_App src -- (AST_Term src ss) (AST_Term src ss)
41 | Token_Term_Var src TeName
42 | Token_Term_Let src TeName (AST_Term src ss) (AST_Term src ss)
44 | Token_Term_Type (AST_Type src)
49 Eq (Token_Term src ss) where
50 Token_Term x == Token_Term y = x == y
51 (==) (Token_Term_Abst _ nx vx bx)
52 (Token_Term_Abst _ ny vy by) =
53 nx == ny && vx == vy && bx == by
54 (==) (Token_Term_App _)
55 (Token_Term_App _) = True
56 (==) (Token_Term_Var _ nx)
57 (Token_Term_Var _ ny) = nx == ny
58 (==) (Token_Term_Let _ nx vx bx)
59 (Token_Term_Let _ ny vy by) =
60 nx == ny && vx == vy && bx == by
62 instance Show_Token ss => Show (Token_Term src ss) where
63 showsPrec p (Token_Term t) = showsPrec p t
64 showsPrec _p (Token_Term_App _) = showString "Token_Term_App"
65 showsPrec p (Token_Term_Abst _ n v b) =
67 showString "Token_Term_Abst" .
68 showChar ' ' . showsPrec 11 n .
69 showChar ' ' . showsPrec 11 v .
70 showChar ' ' . showsPrec 11 b
71 showsPrec p (Token_Term_Var _ n) =
73 showString "Token_Term_Var" .
74 showChar ' ' . showsPrec 11 n
75 showsPrec p (Token_Term_Let _ n v b) =
77 showString "Token_Term_Let" .
78 showChar ' ' . showsPrec 11 n .
79 showChar ' ' . showsPrec 11 v .
80 showChar ' ' . showsPrec 11 b
86 -- ** Type 'Tokenizer'
87 data Tokenizer fixy src ss
89 { tokenizer :: src -> Token_Term src ss
90 , tokenizer_fixity :: fixy
93 -- ** Type 'Tokenizers'
94 data Tokenizers src ss
96 { tokenizers_prefix :: Map Mod_Path (Map TeName (Tokenizer Unifix src ss))
97 , tokenizers_infix :: Map Mod_Path (Map TeName (Tokenizer Infix src ss))
98 , tokenizers_postfix :: Map Mod_Path (Map TeName (Tokenizer Unifix src ss))
102 ( Show (Tokenizer Unifix src ss)
103 , Show (Tokenizer Infix src ss)
104 ) => Show (Tokenizers src ss)
105 instance Semigroup (Tokenizers src ss) where
108 (Map.unionWith Map.union
109 (tokenizers_prefix x)
110 (tokenizers_prefix y))
111 (Map.unionWith Map.union
113 (tokenizers_infix y))
114 (Map.unionWith Map.union
115 (tokenizers_postfix x)
116 (tokenizers_postfix y))
117 instance Monoid (Tokenizers src ss) where
118 mempty = Tokenizers Map.empty Map.empty Map.empty
121 tokenizers :: forall src ss. Tokenize src ss => Tokenizers src ss
122 tokenizers = tokenizeR (Proxy @ss)
124 -- | Lookup the given 'Mod' 'TeName' into the given 'Tokenizers',
125 -- returning for prefix, infix and postfix positions, when there is a match.
131 -> ( Maybe (Tokenizer Unifix src ss)
132 , Maybe (Tokenizer Infix src ss)
133 , Maybe (Tokenizer Unifix src ss)
135 tokenizer_lookup mn@(mod `Mod` n) (Tokenizers pres ins posts) = do
136 let pre = Map.lookup mod pres >>= Map.lookup n
137 let post = Map.lookup mod posts >>= Map.lookup n
142 { tokenizer = Token_Term_App @src @ss
143 , tokenizer_fixity = Infix (Just AssocL) 9
145 _ -> Map.lookup mod ins >>= Map.lookup n
148 -- ** Class 'TokenizeR'
149 class TokenizeR src (ss::[*]) (rs::[*]) where
150 tokenizeR :: Proxy rs -> Tokenizers src ss
151 instance TokenizeR src ss '[] where
152 tokenizeR _rs = mempty
155 , TokenizeR src ss rs
156 ) => TokenizeR src ss (Proxy s ': rs) where
158 tokenizeR (Proxy @rs) <>
161 -- ** Class 'TokenizeT'
162 class TokenizeT src ss s where
163 tokenizeT :: Proxy s -> Tokenizers src ss
164 tokenizeT _t = mempty
168 -> [(TeName, Tokenizer fix src ss)]
169 -> Map Mod_Path (Map TeName (Tokenizer fix src ss))
170 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
174 => Text -> fixity -> TokenT ss (Proxy s)
175 -> (TeName, Tokenizer fixity src ss)
176 token n tokenizer_fixity tok =
177 (TeName n,) Tokenizer
178 { tokenizer = \src -> Token_Term $ inj_EToken src tok
181 -- * Class 'Gram_Name'
194 ) => Gram_Name g where
195 g_mod_path :: CF g Mod_Path
196 g_mod_path = rule "mod_path" $
198 (pure <$> g_mod_name)
201 g_mod_name :: CF g Mod_Name
202 g_mod_name = rule "mod_name" $
203 (Mod_Name . Text.pack <$>) $
207 <*. (any `but` g_term_idname_tail)
209 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
210 headG = unicat $ Unicat Char.UppercaseLetter
212 g_term_mod_name :: CF g (Mod TeName)
213 g_term_mod_name = rule "term_mod_name" $
215 g_term_mod_idname <+>
216 parens g_term_mod_opname
217 g_term_name :: CF g TeName
218 g_term_name = rule "term_name" $
223 g_term_mod_idname :: CF g (Mod TeName)
224 g_term_mod_idname = rule "term_mod_idname" $
226 <$> option [] (try $ g_mod_path <* char '.')
228 g_term_idname :: CF g TeName
229 g_term_idname = rule "term_idname" $
230 (TeName . Text.pack <$>) $
234 <*. (any `but` g_term_idname_tail)
236 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
237 headG = unicat $ Unicat_Letter
238 g_term_idname_tail :: Terminal g Char
239 g_term_idname_tail = rule "term_idname_tail" $
240 unicat Unicat_Letter <+>
242 g_term_keywords :: Reg rl g String
243 g_term_keywords = rule "term_keywords" $
244 choice $ string <$> ["in", "let"]
246 g_term_mod_opname :: CF g (Mod TeName)
247 g_term_mod_opname = rule "term_mod_opname" $
249 <$> option [] (try $ g_mod_path <* char '.')
251 g_term_opname :: CF g TeName
252 g_term_opname = rule "term_opname" $
253 (TeName . Text.pack <$>) $
257 <*. (any `but` g_term_opname_ok)
259 symG = some $ cf_of_Terminal g_term_opname_ok
260 g_term_opname_ok :: Terminal g Char
261 g_term_opname_ok = rule "term_opname_ok" $
268 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
269 g_term_keysyms :: Reg rl g String
270 g_term_keysyms = rule "term_keysyms" $
271 choice $ string <$> ["\\", "->", "=", "@"]
273 deriving instance Gram_Name g => Gram_Name (CF g)
274 instance Gram_Name EBNF
275 instance Gram_Name RuleDef
277 -- * Class 'Gram_Term_Type'
289 ) => Gram_Term_Type src g where
291 :: CF g (TeName, AST_Type src)
292 g_term_abst_decl = rule "term_abst_decl" $
298 deriving instance Gram_Term_Type src g => Gram_Term_Type src (CF g)
299 instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src EBNF
300 instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src RuleDef
302 -- * Class 'Gram_Error'
303 class Gram_Error g where
304 errorG :: CF g (Either Error_Term_Gram a) -> CF g a
305 deriving instance Gram_Error g => Gram_Error (CF g)
306 instance Gram_Error EBNF where
307 errorG (CF (EBNF g)) = CF $ EBNF g
308 instance Gram_Error RuleDef where
309 errorG (CF (RuleDef (EBNF g))) =
310 CF $ RuleDef $ EBNF g
312 -- ** Type 'Error_Term_Gram'
314 = Error_Term_Gram_Fixity Error_Fixity
315 | Error_Term_Gram_Term_incomplete
316 | Error_Term_Gram_Type_applied_to_nothing
317 | Error_Term_Gram_not_applicable
318 | Error_Term_Gram_application
319 | Error_Term_Gram_application_mismatch
322 -- * Class 'Gram_Term'
333 , Gram_Term_AtomsR src ss ss g
335 , Gram_Term_Type src g
338 ) => Gram_Term ss src g where
339 tokenizers_get :: CF g (Tokenizers src ss -> a) -> CF g a
340 tokenizers_put :: CF g (Tokenizers src ss, a) -> CF g a
343 => CF g (AST_Term src ss)
344 g_term = rule "term" $
352 => CF g (AST_Term src ss)
353 g_term_operators = rule "term_operators" $
355 left Error_Term_Gram_Fixity <$>
358 g_ops :: CF g (Either Error_Fixity (AST_Term src ss))
359 g_ops = operators g_term_atom g_prefix g_infix g_postfix
360 g_prefix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss)
361 g_infix :: CF g (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss)
362 g_postfix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss)
363 g_prefix = errorG $ metaG $ tokenizers_get $ op_prefix <$> g_prefix_op
364 g_infix = errorG $ metaG $ tokenizers_get $ op_infix <$> g_infix_op
365 g_postfix = errorG $ metaG $ tokenizers_get $ op_postfix <$> g_postfix_op
370 -> Either Error_Term_Gram
371 (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss)
372 op_infix name toks src = do
373 let (_pre, in_, _post) = tokenizer_lookup name toks
375 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix
377 Right $ (tokenizer_fixity p,) $ \a b ->
378 (BinTree0 (tokenizer p src) `BinTree1` a) `BinTree1` b
379 op_prefix, op_postfix
383 -> Either Error_Term_Gram
385 , AST_Term src ss -> AST_Term src ss )
386 op_prefix name toks src = do
387 let (pre, _in_, _post) = tokenizer_lookup name toks
389 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
391 Right $ (tokenizer_fixity p,) $ \a ->
392 BinTree0 (tokenizer p src) `BinTree1` a
393 op_postfix name toks src = do
394 let (_pre, _in_, post) = tokenizer_lookup name toks
396 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
398 Right $ (tokenizer_fixity p,) $ \a ->
399 BinTree0 (tokenizer p src) `BinTree1` a
400 g_postfix_op :: CF g (Mod TeName)
401 g_postfix_op = rule "term_op_postfix" $
403 g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote)
405 g_infix_op :: CF g (Mod TeName)
406 g_infix_op = rule "term_op_infix" $
408 between g_backquote g_backquote g_term_mod_idname <+>
409 try (Fun.const <$> g_term_mod_opname <*> (string " " <+> string "\n")) <+>
411 g_prefix_op :: CF g (Mod TeName)
412 g_prefix_op = rule "term_op_prefix" $
414 g_term_mod_idname <* g_backquote <+>
416 g_backquote :: Gram_Terminal g' => g' Char
417 g_backquote = char '`'
421 => CF g (AST_Term src ss)
422 g_term_atom = rule "term_atom" $
426 (\typ src -> BinTree0 $ inj_EToken src $ Token_Term_Type typ)
427 <$ char '@' <*> g_type) :) $
429 (try <$> gs_term_atomsR (Proxy @ss)) <>
431 errorG $ metaG $ tokenizers_get $
433 let (_, in_, _) = tokenizer_lookup mn toks
435 Just p -> Right $ BinTree0 $ tokenizer p src
438 [] `Mod` n -> Right $ BinTree0 $ Token_Term_Var src n
439 _ -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix
440 ) <$> g_term_mod_name
445 => CF g (AST_Term src ss)
446 g_term_group = rule "term_group" $ parens g_term
449 => CF g (AST_Term src ss)
450 g_term_abst = rule "term_abst" $
454 BinTree0 . Token_Term_Abst src x ty_x) te xs) <$>) $
455 g_term_abst_args_body
456 (symbol "\\" *> some g_term_abst_decl <* symbol "->")
458 g_term_abst_args_body
459 :: CF g [(TeName, AST_Type src)]
460 -> CF g (AST_Term src ss)
461 -> CF g ([(TeName, AST_Type src)], AST_Term src ss)
462 -- g_term_abst_args_body args body = (,) <$> args <*> body
463 g_term_abst_args_body cf_args cf_body =
464 tokenizers_put $ tokenizers_get $
465 (\a b (toks::Tokenizers src ss) -> (toks, (a, b)))
466 <$> (tokenizers_put $ tokenizers_get $
467 (\args (toks::Tokenizers src ss) -> (,args)
469 { tokenizers_prefix = del (tokenizers_prefix toks) args
470 , tokenizers_infix = ins (tokenizers_infix toks) args
471 , tokenizers_postfix = del (tokenizers_postfix toks) args
475 del = foldr $ \(n, _) -> Map.adjust (Map.delete n) []
476 ins = foldr $ \(n, _) ->
477 Map.insertWith (<>) [] $
480 { tokenizer = \src -> Token_Term_Var src n
481 , tokenizer_fixity = infixN5
485 => CF g (AST_Term src ss)
486 g_term_let = rule "term_let" $
488 (\name args bound body src ->
490 Token_Term_Let src name
491 (foldr (\(x, ty_x) ->
492 BinTree0 . Token_Term_Abst src x ty_x) bound args) body)
495 <*> many g_term_abst_decl
503 , Gram_Term_AtomsR src ss ss (CF g)
504 ) => Gram_Term ss src (CF g)
506 ( Gram_Term_AtomsR src ss ss EBNF
508 , Inj_Source (Text_of_Source src) src
509 ) => Gram_Term ss src EBNF where
510 tokenizers_get (CF (EBNF g)) = CF $ EBNF g
511 tokenizers_put (CF (EBNF g)) = CF $ EBNF g
513 ( Gram_Term_AtomsR src ss ss RuleDef
515 , Inj_Source (Text_of_Source src) src
516 ) => Gram_Term ss src RuleDef where
517 tokenizers_get (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g
518 tokenizers_put (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g
520 -- ** Class 'Gram_Term_AtomsR'
521 class Gram_Term_AtomsR src (ss::[*]) (rs::[*]) g where
522 gs_term_atomsR :: Proxy rs -> [CF g (AST_Term src ss)]
523 instance Gram_Term_AtomsR src ss '[] g where
524 gs_term_atomsR _rs = []
526 ( Gram_Term_AtomsT src ss t g
527 , Gram_Term_AtomsR src ss rs g
528 ) => Gram_Term_AtomsR src ss (t ': rs) g where
530 gs_term_atomsT (Proxy @t) <>
531 gs_term_atomsR (Proxy @rs)
533 -- ** Class 'Gram_Term_AtomsT'
534 class Gram_Term_AtomsT src ss t g where
535 gs_term_atomsT :: Proxy t -> [CF g (AST_Term src ss)]
536 gs_term_atomsT _t = []
537 -- instance Gram_Term_AtomsT src ss t RuleDef
541 ( Gram_Term '[Proxy (->), Proxy Integer] () g
545 , voiD g_term_operators
549 , void (g_term_abst_decl::CF g (TeName, AST_Type ()))
551 , void g_term_mod_name
554 , void $ cf_of_Terminal g_term_idname_tail
555 , void $ cf_of_Reg g_term_keywords
556 , void g_term_mod_opname
558 , void $ cf_of_Terminal g_term_opname_ok
559 , void $ cf_of_Reg g_term_keysyms
561 voiD :: CF g (AST_Term () '[Proxy (->), Proxy Integer]) -> CF g ()