]> Git — Sourcephile - haskell/symantic.git/blob - symantic/Language/Symantic/Compiling/Term/Grammar.hs
Try the new Type and Term design against the actual needs.
[haskell/symantic.git] / symantic / Language / Symantic / Compiling / Term / Grammar.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
9
10 import Control.Arrow (left)
11 import Control.Monad (void)
12 import Data.Map.Strict (Map)
13 import Data.Proxy (Proxy(..))
14 import Data.Semigroup (Semigroup(..))
15 import Data.Text (Text)
16 import Prelude hiding (mod, not, any)
17 import qualified Data.Char as Char
18 import qualified Data.Function as Fun
19 import qualified Data.Map.Strict as Map
20 import qualified Data.Text as Text
21
22 import Language.Symantic.Parsing
23 import Language.Symantic.Typing
24
25 -- * Type 'Mod'
26 type Mod_Path = [Mod_Name]
27 newtype Mod_Name = Mod_Name Text
28 deriving (Eq, Ord, Show)
29 data Mod a = Mod Mod_Path a
30 deriving (Eq, Functor, Ord, Show)
31
32 -- * Type 'AST_Term'
33 -- type AST_Term src ss = BinTree (EToken src ss)
34 type AST_Term src ss = BinTree (Token_Term src ss)
35
36 -- ** Type 'Token_Term'
37 data Token_Term src ss
38 = Token_Term (EToken src ss)
39 | Token_Term_Abst src TeName (AST_Type src) (AST_Term src ss)
40 | Token_Term_App src -- (AST_Term src ss) (AST_Term src ss)
41 | Token_Term_Var src TeName
42 | Token_Term_Let src TeName (AST_Term src ss) (AST_Term src ss)
43 {-
44 | Token_Term_Type (AST_Type src)
45 -}
46
47 instance
48 Eq_Token ss =>
49 Eq (Token_Term src ss) where
50 Token_Term x == Token_Term y = x == y
51 (==) (Token_Term_Abst _ nx vx bx)
52 (Token_Term_Abst _ ny vy by) =
53 nx == ny && vx == vy && bx == by
54 (==) (Token_Term_App _)
55 (Token_Term_App _) = True
56 (==) (Token_Term_Var _ nx)
57 (Token_Term_Var _ ny) = nx == ny
58 (==) (Token_Term_Let _ nx vx bx)
59 (Token_Term_Let _ ny vy by) =
60 nx == ny && vx == vy && bx == by
61 (==) _ _ = False
62 instance Show_Token ss => Show (Token_Term src ss) where
63 showsPrec p (Token_Term t) = showsPrec p t
64 showsPrec _p (Token_Term_App _) = showString "Token_Term_App"
65 showsPrec p (Token_Term_Abst _ n v b) =
66 showParen (p > 10) $
67 showString "Token_Term_Abst" .
68 showChar ' ' . showsPrec 11 n .
69 showChar ' ' . showsPrec 11 v .
70 showChar ' ' . showsPrec 11 b
71 showsPrec p (Token_Term_Var _ n) =
72 showParen (p > 10) $
73 showString "Token_Term_Var" .
74 showChar ' ' . showsPrec 11 n
75 showsPrec p (Token_Term_Let _ n v b) =
76 showParen (p > 10) $
77 showString "Token_Term_Let" .
78 showChar ' ' . showsPrec 11 n .
79 showChar ' ' . showsPrec 11 v .
80 showChar ' ' . showsPrec 11 b
81
82 -- * Class 'Tokenize'
83 type Tokenize src ss
84 = TokenizeR src ss ss
85
86 -- ** Type 'Tokenizer'
87 data Tokenizer fixy src ss
88 = Tokenizer
89 { tokenizer :: src -> Token_Term src ss
90 , tokenizer_fixity :: fixy
91 }
92
93 -- ** Type 'Tokenizers'
94 data Tokenizers src ss
95 = Tokenizers
96 { tokenizers_prefix :: Map Mod_Path (Map TeName (Tokenizer Unifix src ss))
97 , tokenizers_infix :: Map Mod_Path (Map TeName (Tokenizer Infix src ss))
98 , tokenizers_postfix :: Map Mod_Path (Map TeName (Tokenizer Unifix src ss))
99 }
100
101 deriving instance
102 ( Show (Tokenizer Unifix src ss)
103 , Show (Tokenizer Infix src ss)
104 ) => Show (Tokenizers src ss)
105 instance Semigroup (Tokenizers src ss) where
106 x <> y =
107 Tokenizers
108 (Map.unionWith Map.union
109 (tokenizers_prefix x)
110 (tokenizers_prefix y))
111 (Map.unionWith Map.union
112 (tokenizers_infix x)
113 (tokenizers_infix y))
114 (Map.unionWith Map.union
115 (tokenizers_postfix x)
116 (tokenizers_postfix y))
117 instance Monoid (Tokenizers src ss) where
118 mempty = Tokenizers Map.empty Map.empty Map.empty
119 mappend = (<>)
120
121 tokenizers :: forall src ss. Tokenize src ss => Tokenizers src ss
122 tokenizers = tokenizeR (Proxy @ss)
123
124 -- | Lookup the given 'Mod' 'TeName' into the given 'Tokenizers',
125 -- returning for prefix, infix and postfix positions, when there is a match.
126 tokenizer_lookup
127 :: forall src ss.
128 Inj_Token ss (->)
129 => Mod TeName
130 -> Tokenizers src ss
131 -> ( Maybe (Tokenizer Unifix src ss)
132 , Maybe (Tokenizer Infix src ss)
133 , Maybe (Tokenizer Unifix src ss)
134 )
135 tokenizer_lookup mn@(mod `Mod` n) (Tokenizers pres ins posts) = do
136 let pre = Map.lookup mod pres >>= Map.lookup n
137 let post = Map.lookup mod posts >>= Map.lookup n
138 let in_ =
139 case mn of
140 [] `Mod` " " -> Just
141 Tokenizer
142 { tokenizer = Token_Term_App @src @ss
143 , tokenizer_fixity = Infix (Just AssocL) 9
144 }
145 _ -> Map.lookup mod ins >>= Map.lookup n
146 (pre, in_, post)
147
148 -- ** Class 'TokenizeR'
149 class TokenizeR src (ss::[*]) (rs::[*]) where
150 tokenizeR :: Proxy rs -> Tokenizers src ss
151 instance TokenizeR src ss '[] where
152 tokenizeR _rs = mempty
153 instance
154 ( TokenizeT src ss s
155 , TokenizeR src ss rs
156 ) => TokenizeR src ss (Proxy s ': rs) where
157 tokenizeR _ =
158 tokenizeR (Proxy @rs) <>
159 tokenizeT (Proxy @s)
160
161 -- ** Class 'TokenizeT'
162 class TokenizeT src ss s where
163 tokenizeT :: Proxy s -> Tokenizers src ss
164 tokenizeT _t = mempty
165
166 tokenizeTMod
167 :: Mod_Path
168 -> [(TeName, Tokenizer fix src ss)]
169 -> Map Mod_Path (Map TeName (Tokenizer fix src ss))
170 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
171
172 token
173 :: Inj_Token ss s
174 => Text -> fixity -> TokenT ss (Proxy s)
175 -> (TeName, Tokenizer fixity src ss)
176 token n tokenizer_fixity tok =
177 (TeName n,) Tokenizer
178 { tokenizer = \src -> Token_Term $ inj_EToken src tok
179 , tokenizer_fixity }
180
181 -- * Class 'Gram_Name'
182 class
183 ( Alt g
184 , Alter g
185 , Alter g
186 , App g
187 , Try g
188 , Gram_CF g
189 , Gram_Op g
190 , Gram_Lexer g
191 , Gram_RegL g
192 , Gram_Rule g
193 , Gram_Terminal g
194 ) => Gram_Name g where
195 g_mod_path :: CF g Mod_Path
196 g_mod_path = rule "mod_path" $
197 infixrG
198 (pure <$> g_mod_name)
199 (op <$ char '.')
200 where op = (<>)
201 g_mod_name :: CF g Mod_Name
202 g_mod_name = rule "mod_name" $
203 (Mod_Name . Text.pack <$>) $
204 (identG `minus`) $
205 Fun.const
206 <$> g_term_keywords
207 <*. (any `but` g_term_idname_tail)
208 where
209 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
210 headG = unicat $ Unicat Char.UppercaseLetter
211
212 g_term_mod_name :: CF g (Mod TeName)
213 g_term_mod_name = rule "term_mod_name" $
214 lexeme $
215 g_term_mod_idname <+>
216 parens g_term_mod_opname
217 g_term_name :: CF g TeName
218 g_term_name = rule "term_name" $
219 lexeme $
220 g_term_idname <+>
221 parens g_term_opname
222
223 g_term_mod_idname :: CF g (Mod TeName)
224 g_term_mod_idname = rule "term_mod_idname" $
225 Mod
226 <$> option [] (try $ g_mod_path <* char '.')
227 <*> g_term_idname
228 g_term_idname :: CF g TeName
229 g_term_idname = rule "term_idname" $
230 (TeName . Text.pack <$>) $
231 (identG `minus`) $
232 Fun.const
233 <$> g_term_keywords
234 <*. (any `but` g_term_idname_tail)
235 where
236 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
237 headG = unicat $ Unicat_Letter
238 g_term_idname_tail :: Terminal g Char
239 g_term_idname_tail = rule "term_idname_tail" $
240 unicat Unicat_Letter <+>
241 unicat Unicat_Number
242 g_term_keywords :: Reg rl g String
243 g_term_keywords = rule "term_keywords" $
244 choice $ string <$> ["in", "let"]
245
246 g_term_mod_opname :: CF g (Mod TeName)
247 g_term_mod_opname = rule "term_mod_opname" $
248 Mod
249 <$> option [] (try $ g_mod_path <* char '.')
250 <*> g_term_opname
251 g_term_opname :: CF g TeName
252 g_term_opname = rule "term_opname" $
253 (TeName . Text.pack <$>) $
254 (symG `minus`) $
255 Fun.const
256 <$> g_term_keysyms
257 <*. (any `but` g_term_opname_ok)
258 where
259 symG = some $ cf_of_Terminal g_term_opname_ok
260 g_term_opname_ok :: Terminal g Char
261 g_term_opname_ok = rule "term_opname_ok" $
262 choice (unicat <$>
263 [ Unicat_Symbol
264 , Unicat_Punctuation
265 , Unicat_Mark
266 ]) `but` koG
267 where
268 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
269 g_term_keysyms :: Reg rl g String
270 g_term_keysyms = rule "term_keysyms" $
271 choice $ string <$> ["\\", "->", "=", "@"]
272
273 deriving instance Gram_Name g => Gram_Name (CF g)
274 instance Gram_Name EBNF
275 instance Gram_Name RuleDef
276
277 -- * Class 'Gram_Term_Type'
278 class
279 ( Alt g
280 , Alter g
281 , App g
282 , Gram_CF g
283 , Gram_Lexer g
284 , Gram_Meta src g
285 , Gram_Rule g
286 , Gram_Terminal g
287 , Gram_Name g
288 , Gram_Type src g
289 ) => Gram_Term_Type src g where
290 g_term_abst_decl
291 :: CF g (TeName, AST_Type src)
292 g_term_abst_decl = rule "term_abst_decl" $
293 parens $ (,)
294 <$> g_term_name
295 <* symbol ":"
296 <*> g_type
297
298 deriving instance Gram_Term_Type src g => Gram_Term_Type src (CF g)
299 instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src EBNF
300 instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src RuleDef
301
302 -- * Class 'Gram_Error'
303 class Gram_Error g where
304 errorG :: CF g (Either Error_Term_Gram a) -> CF g a
305 deriving instance Gram_Error g => Gram_Error (CF g)
306 instance Gram_Error EBNF where
307 errorG (CF (EBNF g)) = CF $ EBNF g
308 instance Gram_Error RuleDef where
309 errorG (CF (RuleDef (EBNF g))) =
310 CF $ RuleDef $ EBNF g
311
312 -- ** Type 'Error_Term_Gram'
313 data Error_Term_Gram
314 = Error_Term_Gram_Fixity Error_Fixity
315 | Error_Term_Gram_Term_incomplete
316 | Error_Term_Gram_Type_applied_to_nothing
317 | Error_Term_Gram_not_applicable
318 | Error_Term_Gram_application
319 | Error_Term_Gram_application_mismatch
320 deriving (Eq, Show)
321
322 -- * Class 'Gram_Term'
323 class
324 ( Alt g
325 , Alter g
326 , App g
327 , Gram_CF g
328 , Gram_Lexer g
329 , Gram_Meta src g
330 , Gram_Rule g
331 , Gram_Terminal g
332 , Gram_Error g
333 , Gram_Term_AtomsR src ss ss g
334 , Gram_Name g
335 , Gram_Term_Type src g
336 , Gram_Type src g
337 , Inj_Token ss (->)
338 ) => Gram_Term ss src g where
339 tokenizers_get :: CF g (Tokenizers src ss -> a) -> CF g a
340 tokenizers_put :: CF g (Tokenizers src ss, a) -> CF g a
341 g_term
342 :: Inj_Token ss (->)
343 => CF g (AST_Term src ss)
344 g_term = rule "term" $
345 choice
346 [ try g_term_abst
347 , g_term_operators
348 , g_term_let
349 ]
350 g_term_operators
351 :: Inj_Token ss (->)
352 => CF g (AST_Term src ss)
353 g_term_operators = rule "term_operators" $
354 errorG $
355 left Error_Term_Gram_Fixity <$>
356 g_ops
357 where
358 g_ops :: CF g (Either Error_Fixity (AST_Term src ss))
359 g_ops = operators g_term_atom g_prefix g_infix g_postfix
360 g_prefix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss)
361 g_infix :: CF g (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss)
362 g_postfix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss)
363 g_prefix = errorG $ metaG $ tokenizers_get $ op_prefix <$> g_prefix_op
364 g_infix = errorG $ metaG $ tokenizers_get $ op_infix <$> g_infix_op
365 g_postfix = errorG $ metaG $ tokenizers_get $ op_postfix <$> g_postfix_op
366 op_infix
367 :: Mod TeName
368 -> Tokenizers src ss
369 -> src
370 -> Either Error_Term_Gram
371 (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss)
372 op_infix name toks src = do
373 let (_pre, in_, _post) = tokenizer_lookup name toks
374 case in_ of
375 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix
376 Just p ->
377 Right $ (tokenizer_fixity p,) $ \a b ->
378 (BinTree0 (tokenizer p src) `BinTree1` a) `BinTree1` b
379 op_prefix, op_postfix
380 :: Mod TeName
381 -> Tokenizers src ss
382 -> src
383 -> Either Error_Term_Gram
384 ( Unifix
385 , AST_Term src ss -> AST_Term src ss )
386 op_prefix name toks src = do
387 let (pre, _in_, _post) = tokenizer_lookup name toks
388 case pre of
389 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
390 Just p ->
391 Right $ (tokenizer_fixity p,) $ \a ->
392 BinTree0 (tokenizer p src) `BinTree1` a
393 op_postfix name toks src = do
394 let (_pre, _in_, post) = tokenizer_lookup name toks
395 case post of
396 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
397 Just p ->
398 Right $ (tokenizer_fixity p,) $ \a ->
399 BinTree0 (tokenizer p src) `BinTree1` a
400 g_postfix_op :: CF g (Mod TeName)
401 g_postfix_op = rule "term_op_postfix" $
402 lexeme $
403 g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote)
404 g_term_mod_opname
405 g_infix_op :: CF g (Mod TeName)
406 g_infix_op = rule "term_op_infix" $
407 lexeme $
408 between g_backquote g_backquote g_term_mod_idname <+>
409 try (Fun.const <$> g_term_mod_opname <*> (string " " <+> string "\n")) <+>
410 pure (Mod [] " ")
411 g_prefix_op :: CF g (Mod TeName)
412 g_prefix_op = rule "term_op_prefix" $
413 lexeme $
414 g_term_mod_idname <* g_backquote <+>
415 g_term_mod_opname
416 g_backquote :: Gram_Terminal g' => g' Char
417 g_backquote = char '`'
418
419 g_term_atom
420 :: Inj_Token ss (->)
421 => CF g (AST_Term src ss)
422 g_term_atom = rule "term_atom" $
423 choice $
424 {-(try (
425 metaG $
426 (\typ src -> BinTree0 $ inj_EToken src $ Token_Term_Type typ)
427 <$ char '@' <*> g_type) :) $
428 -}
429 (try <$> gs_term_atomsR (Proxy @ss)) <>
430 [ try $
431 errorG $ metaG $ tokenizers_get $
432 (\mn toks src -> do
433 let (_, in_, _) = tokenizer_lookup mn toks
434 case in_ of
435 Just p -> Right $ BinTree0 $ tokenizer p src
436 Nothing ->
437 case mn of
438 [] `Mod` n -> Right $ BinTree0 $ Token_Term_Var src n
439 _ -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix
440 ) <$> g_term_mod_name
441 , g_term_group
442 ]
443 g_term_group
444 :: Inj_Token ss (->)
445 => CF g (AST_Term src ss)
446 g_term_group = rule "term_group" $ parens g_term
447 g_term_abst
448 :: Inj_Token ss (->)
449 => CF g (AST_Term src ss)
450 g_term_abst = rule "term_abst" $
451 metaG $
452 ((\(xs, te) src ->
453 foldr (\(x, ty_x) ->
454 BinTree0 . Token_Term_Abst src x ty_x) te xs) <$>) $
455 g_term_abst_args_body
456 (symbol "\\" *> some g_term_abst_decl <* symbol "->")
457 g_term
458 g_term_abst_args_body
459 :: CF g [(TeName, AST_Type src)]
460 -> CF g (AST_Term src ss)
461 -> CF g ([(TeName, AST_Type src)], AST_Term src ss)
462 -- g_term_abst_args_body args body = (,) <$> args <*> body
463 g_term_abst_args_body cf_args cf_body =
464 tokenizers_put $ tokenizers_get $
465 (\a b (toks::Tokenizers src ss) -> (toks, (a, b)))
466 <$> (tokenizers_put $ tokenizers_get $
467 (\args (toks::Tokenizers src ss) -> (,args)
468 Tokenizers
469 { tokenizers_prefix = del (tokenizers_prefix toks) args
470 , tokenizers_infix = ins (tokenizers_infix toks) args
471 , tokenizers_postfix = del (tokenizers_postfix toks) args
472 }) <$> cf_args)
473 <*> cf_body
474 where
475 del = foldr $ \(n, _) -> Map.adjust (Map.delete n) []
476 ins = foldr $ \(n, _) ->
477 Map.insertWith (<>) [] $
478 Map.singleton n
479 Tokenizer
480 { tokenizer = \src -> Token_Term_Var src n
481 , tokenizer_fixity = infixN5
482 }
483 g_term_let
484 :: Inj_Token ss (->)
485 => CF g (AST_Term src ss)
486 g_term_let = rule "term_let" $
487 metaG $
488 (\name args bound body src ->
489 BinTree0 $
490 Token_Term_Let src name
491 (foldr (\(x, ty_x) ->
492 BinTree0 . Token_Term_Abst src x ty_x) bound args) body)
493 <$ symbol "let"
494 <*> g_term_name
495 <*> many g_term_abst_decl
496 <* symbol "="
497 <*> g_term
498 <* symbol "in"
499 <*> g_term
500
501 deriving instance
502 ( Gram_Term ss src g
503 , Gram_Term_AtomsR src ss ss (CF g)
504 ) => Gram_Term ss src (CF g)
505 instance
506 ( Gram_Term_AtomsR src ss ss EBNF
507 , Inj_Token ss (->)
508 , Inj_Source (Text_of_Source src) src
509 ) => Gram_Term ss src EBNF where
510 tokenizers_get (CF (EBNF g)) = CF $ EBNF g
511 tokenizers_put (CF (EBNF g)) = CF $ EBNF g
512 instance
513 ( Gram_Term_AtomsR src ss ss RuleDef
514 , Inj_Token ss (->)
515 , Inj_Source (Text_of_Source src) src
516 ) => Gram_Term ss src RuleDef where
517 tokenizers_get (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g
518 tokenizers_put (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g
519
520 -- ** Class 'Gram_Term_AtomsR'
521 class Gram_Term_AtomsR src (ss::[*]) (rs::[*]) g where
522 gs_term_atomsR :: Proxy rs -> [CF g (AST_Term src ss)]
523 instance Gram_Term_AtomsR src ss '[] g where
524 gs_term_atomsR _rs = []
525 instance
526 ( Gram_Term_AtomsT src ss t g
527 , Gram_Term_AtomsR src ss rs g
528 ) => Gram_Term_AtomsR src ss (t ': rs) g where
529 gs_term_atomsR _ =
530 gs_term_atomsT (Proxy @t) <>
531 gs_term_atomsR (Proxy @rs)
532
533 -- ** Class 'Gram_Term_AtomsT'
534 class Gram_Term_AtomsT src ss t g where
535 gs_term_atomsT :: Proxy t -> [CF g (AST_Term src ss)]
536 gs_term_atomsT _t = []
537 -- instance Gram_Term_AtomsT src ss t RuleDef
538
539 gram_term
540 :: forall g.
541 ( Gram_Term '[Proxy (->), Proxy Integer] () g
542 ) => [CF g ()]
543 gram_term =
544 [ voiD g_term
545 , voiD g_term_operators
546 , voiD g_term_atom
547 , voiD g_term_group
548 , voiD g_term_abst
549 , void (g_term_abst_decl::CF g (TeName, AST_Type ()))
550 , voiD g_term_let
551 , void g_term_mod_name
552 , void g_term_name
553 , void g_term_idname
554 , void $ cf_of_Terminal g_term_idname_tail
555 , void $ cf_of_Reg g_term_keywords
556 , void g_term_mod_opname
557 , void g_term_opname
558 , void $ cf_of_Terminal g_term_opname_ok
559 , void $ cf_of_Reg g_term_keysyms
560 ] where
561 voiD :: CF g (AST_Term () '[Proxy (->), Proxy Integer]) -> CF g ()
562 voiD = (() <$)