]> Git — Sourcephile - haskell/symantic.git/blob - symantic/Language/Symantic/Compiling/Grammar.hs
Massive rewrite to better support rank-1 polymorphic types.
[haskell/symantic.git] / symantic / Language / Symantic / Compiling / Grammar.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE ExistentialQuantification #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 module Language.Symantic.Compiling.Grammar where
8
9 import Control.Arrow (left)
10 import Control.Monad (void)
11 import Data.Map.Strict (Map)
12 import Data.Proxy (Proxy(..))
13 import Data.Semigroup (Semigroup(..))
14 import Data.String (IsString(..))
15 import Data.Text (Text)
16 import Prelude hiding (mod, not, any)
17 import qualified Data.Char as Char
18 import qualified Data.Function as Fun
19 import qualified Data.Map.Strict as Map
20 import qualified Data.Text as Text
21
22 import Language.Symantic.Grammar
23 import Language.Symantic.Typing
24 import Language.Symantic.Compiling.Term
25
26 -- * Type 'Mod'
27 data Mod a = Mod PathMod a
28 deriving (Eq, Functor, Ord, Show)
29
30 -- ** Type 'PathMod'
31 type PathMod = [NameMod]
32
33 -- ** Type 'NameMod'
34 newtype NameMod = NameMod Text
35 deriving (Eq, Ord, Show)
36
37 -- ** Type 'NameTe'
38 newtype NameTe = NameTe Text
39 deriving (Eq, Ord, Show)
40 instance IsString NameTe where
41 fromString = NameTe . fromString
42
43 -- * Type 'Modules'
44 data Modules src ss
45 = Modules
46 { modules_prefix :: Map PathMod (Map NameTe (Tokenizer Unifix src ss))
47 , modules_infix :: Map PathMod (Map NameTe (Tokenizer Infix src ss))
48 , modules_postfix :: Map PathMod (Map NameTe (Tokenizer Unifix src ss))
49 }
50
51 deriving instance
52 ( Show (Tokenizer Unifix src ss)
53 , Show (Tokenizer Infix src ss)
54 ) => Show (Modules src ss)
55 instance Semigroup (Modules src ss) where
56 x <> y =
57 Modules
58 { modules_prefix =
59 Map.unionWith (<>)
60 (modules_prefix x)
61 (modules_prefix y)
62 , modules_infix =
63 Map.unionWith (<>)
64 (modules_infix x)
65 (modules_infix y)
66 , modules_postfix =
67 Map.unionWith (<>)
68 (modules_postfix x)
69 (modules_postfix y)
70 }
71 instance Monoid (Modules src ss) where
72 mempty = Modules Map.empty Map.empty Map.empty
73 mappend = (<>)
74
75 -- ** Type 'Tokenizer'
76 data Tokenizer fixy src ss
77 = Tokenizer
78 { token_fixity :: fixy
79 , token_term :: src -> Token_Term src ss
80 }
81
82 -- ** Type 'Token_Term'
83 data Token_Term src ss
84 = Token_Term (TermVT_CF src ss)
85 | Token_Term_Abst src NameTe (AST_Type src) (AST_Term src ss)
86 | Token_Term_Var src NameTe
87 | Token_Term_Let src NameTe (AST_Term src ss) (AST_Term src ss)
88 | Token_Term_App src
89 deriving (Eq, Show)
90
91 -- ** Type 'AST_Term'
92 -- | /Abstract Syntax Tree/ of 'Token_Term'.
93 type AST_Term src ss = BinTree (Token_Term src ss)
94
95 -- * Class 'Inj_Modules'
96 type Inj_Modules src ss
97 = Inj_ModulesR src ss ss
98
99 inj_modules :: forall src ss. Inj_Modules src ss => Modules src ss
100 inj_modules = inj_modulesR (Proxy @ss)
101
102 -- ** Class 'Inj_ModulesR'
103 class Inj_ModulesR src (ss::[*]) (rs::[*]) where
104 inj_modulesR :: Proxy rs -> Modules src ss
105 instance Inj_ModulesR src ss '[] where
106 inj_modulesR _rs = mempty
107 instance
108 ( Module src ss s
109 , Inj_ModulesR src ss rs
110 ) => Inj_ModulesR src ss (Proxy s ': rs) where
111 inj_modulesR _ = inj_modulesR (Proxy @rs) <> module_ (Proxy @s)
112
113 -- | Lookup the given 'Mod' 'NameTe' into the given 'Modules',
114 -- returning for prefix, infix and postfix positions, when there is a match.
115 modulesLookup ::
116 forall src ss.
117 Mod NameTe ->
118 Modules src ss ->
119 ( Maybe (Tokenizer Unifix src ss)
120 , Maybe (Tokenizer Infix src ss)
121 , Maybe (Tokenizer Unifix src ss)
122 )
123 modulesLookup mn@(mod `Mod` n) (Modules pres ins posts) = do
124 let pre = Map.lookup mod pres >>= Map.lookup n
125 let post = Map.lookup mod posts >>= Map.lookup n
126 let in_ =
127 case mn of
128 [] `Mod` " " -> Just
129 Tokenizer
130 { token_term = Token_Term_App @src @ss
131 , token_fixity = Infix (Just AssocL) 9
132 }
133 _ -> Map.lookup mod ins >>= Map.lookup n
134 (pre, in_, post)
135
136 -- * Class 'Module'
137 class Module src ss s where
138 module_ :: Proxy s -> Modules src ss
139 module_ _t = mempty
140
141 moduleWhere ::
142 forall src ss.
143 Source src =>
144 PathMod -> [DefTerm src ss] -> Modules src ss
145 moduleWhere mod lst =
146 Modules
147 { modules_infix = mk $ \(n `WithFixity` fixy := t) ->
148 case fixy of
149 Fixity2 inf -> [(n, Tokenizer inf $ Token_Term . setSource (TermVT_CF t))]
150 _ -> []
151 , modules_prefix = mk $ \(n `WithFixity` fixy := t) ->
152 case fixy of
153 Fixity1 pre@Prefix{} -> [(n, Tokenizer pre $ Token_Term . setSource (TermVT_CF t))]
154 _ -> []
155 , modules_postfix = mk $ \(n `WithFixity` fixy := t) ->
156 case fixy of
157 Fixity1 post@Postfix{} -> [(n, Tokenizer post $ Token_Term . setSource (TermVT_CF t))]
158 _ -> []
159 }
160 where
161 mk ::
162 (DefTerm src ss -> [(NameTe, Tokenizer fixy src ss)]) ->
163 Map PathMod (Map NameTe (Tokenizer fixy src ss))
164 mk = Map.singleton mod . Map.fromList . (`foldMap` lst)
165
166 -- ** Type 'DefTerm'
167 data DefTerm src ss
168 = forall vs t.
169 (:=) (WithFixity NameTe)
170 (forall es. Term src ss es vs t)
171
172 -- ** Type 'WithFixity'
173 data WithFixity a
174 = WithFixity a Fixity
175 deriving (Eq, Show)
176 instance IsString (WithFixity NameTe) where
177 fromString a = WithFixity (fromString a) (Fixity2 infixN5)
178
179 withInfix :: a -> Infix -> WithFixity a
180 withInfix a inf = a `WithFixity` Fixity2 inf
181 withInfixR :: a -> Precedence -> WithFixity a
182 withInfixR a p = a `WithFixity` Fixity2 (infixR p)
183 withInfixL :: a -> Precedence -> WithFixity a
184 withInfixL a p = a `WithFixity` Fixity2 (infixL p)
185 withInfixN :: a -> Precedence -> WithFixity a
186 withInfixN a p = a `WithFixity` Fixity2 (infixN p)
187 withInfixB :: a -> (Side, Precedence) -> WithFixity a
188 withInfixB a (lr, p) = a `WithFixity` Fixity2 (infixB lr p)
189 withPrefix :: a -> Precedence -> WithFixity a
190 withPrefix a p = a `WithFixity` Fixity1 (Prefix p)
191 withPostfix :: a -> Precedence -> WithFixity a
192 withPostfix a p = a `WithFixity` Fixity1 (Postfix p)
193
194 -- * Class 'Gram_Name'
195 class
196 ( Gram_Terminal g
197 , Gram_Rule g
198 , Gram_Alt g
199 , Gram_Try g
200 , Gram_App g
201 , Gram_AltApp g
202 , Gram_RegL g
203 , Gram_CF g
204 , Gram_Comment g
205 , Gram_Op g
206 ) => Gram_Name g where
207 g_mod_path :: CF g PathMod
208 g_mod_path = rule "mod_path" $
209 infixrG
210 (pure <$> g_mod_name)
211 (op <$ char '.')
212 where op = (<>)
213 g_mod_name :: CF g NameMod
214 g_mod_name = rule "mod_name" $
215 (NameMod . Text.pack <$>) $
216 (identG `minus`) $
217 Fun.const
218 <$> g_term_keywords
219 <*. (any `but` g_term_idname_tail)
220 where
221 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
222 headG = unicat $ Unicat Char.UppercaseLetter
223
224 g_term_mod_name :: CF g (Mod NameTe)
225 g_term_mod_name = rule "term_mod_name" $
226 lexeme $
227 g_term_mod_idname <+>
228 parens g_term_mod_opname
229 g_term_name :: CF g NameTe
230 g_term_name = rule "term_name" $
231 lexeme $
232 g_term_idname <+>
233 parens g_term_opname
234
235 g_term_mod_idname :: CF g (Mod NameTe)
236 g_term_mod_idname = rule "term_mod_idname" $
237 Mod
238 <$> option [] (try $ g_mod_path <* char '.')
239 <*> g_term_idname
240 g_term_idname :: CF g NameTe
241 g_term_idname = rule "term_idname" $
242 (NameTe . Text.pack <$>) $
243 (identG `minus`) $
244 Fun.const
245 <$> g_term_keywords
246 <*. (any `but` g_term_idname_tail)
247 where
248 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
249 headG = unicat $ Unicat_Letter
250 g_term_idname_tail :: Terminal g Char
251 g_term_idname_tail = rule "term_idname_tail" $
252 unicat Unicat_Letter <+>
253 unicat Unicat_Number
254 g_term_keywords :: Reg rl g String
255 g_term_keywords = rule "term_keywords" $
256 choice $ string <$> ["in", "let"]
257
258 g_term_mod_opname :: CF g (Mod NameTe)
259 g_term_mod_opname = rule "term_mod_opname" $
260 Mod
261 <$> option [] (try $ g_mod_path <* char '.')
262 <*> g_term_opname
263 g_term_opname :: CF g NameTe
264 g_term_opname = rule "term_opname" $
265 (NameTe . Text.pack <$>) $
266 (symG `minus`) $
267 Fun.const
268 <$> g_term_keysyms
269 <*. (any `but` g_term_opname_ok)
270 where
271 symG = some $ cf_of_Terminal g_term_opname_ok
272 g_term_opname_ok :: Terminal g Char
273 g_term_opname_ok = rule "term_opname_ok" $
274 choice (unicat <$>
275 [ Unicat_Symbol
276 , Unicat_Punctuation
277 , Unicat_Mark
278 ]) `but` koG
279 where
280 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
281 g_term_keysyms :: Reg rl g String
282 g_term_keysyms = rule "term_keysyms" $
283 choice $ string <$> ["\\", "->", "=", "@"]
284
285 deriving instance Gram_Name g => Gram_Name (CF g)
286 instance Gram_Name EBNF
287 instance Gram_Name RuleEBNF
288
289 -- * Class 'Gram_Term_Type'
290 class
291 ( Gram_Terminal g
292 , Gram_Rule g
293 , Gram_Meta src g
294 , Gram_Alt g
295 , Gram_AltApp g
296 , Gram_App g
297 , Gram_CF g
298 , Gram_Comment g
299 , Gram_Name g
300 , Gram_Type src g
301 ) => Gram_Term_Type src g where
302 g_term_abst_decl
303 :: CF g (NameTe, AST_Type src)
304 g_term_abst_decl = rule "term_abst_decl" $
305 parens $ (,)
306 <$> g_term_name
307 <* (symbol "::" <+> symbol ":")
308 -- NOTE: "::" is Haskell compatibility and ":" is another common notation.
309 <*> g_type
310
311 deriving instance Gram_Term_Type src g => Gram_Term_Type src (CF g)
312 instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src EBNF
313 instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src RuleEBNF
314
315 -- ** Type 'Error_Term_Gram'
316 data Error_Term_Gram
317 = Error_Term_Gram_Fixity Error_Fixity
318 | Error_Term_Gram_Fixity_Need FixityPos
319 | Error_Term_Gram_Term_incomplete
320 | Error_Term_Gram_Type_applied_to_nothing
321 | Error_Term_Gram_not_applicable
322 | Error_Term_Gram_application
323 | Error_Term_Gram_application_mismatch
324 deriving (Eq, Show)
325
326 -- *** Type 'FixityPos'
327 data FixityPos
328 = FixityPos_Prefix
329 | FixityPos_Infix
330 | FixityPos_Postfix
331 deriving (Eq, Show)
332
333 -- * Class 'Gram_Term'
334 class
335 ( Gram_Meta src g
336 , Gram_Error Error_Term_Gram g
337 , Gram_Terminal g
338 , Gram_Rule g
339 , Gram_Alt g
340 , Gram_App g
341 , Gram_AltApp g
342 , Gram_CF g
343 , Gram_Comment g
344 , Gram_Type src g
345 , Gram_Name g
346 , Gram_Term_Type src g
347 , Gram_Term_Atoms src ss g
348 , Show src
349 ) => Gram_Term src ss g where
350 modules_get :: CF g (Modules src ss -> a) -> CF g a
351 modules_put :: CF g (Modules src ss, a) -> CF g a
352 g_term :: CF g (AST_Term src ss)
353 g_term = rule "term" $
354 choice
355 [ try g_term_abst
356 , g_term_operators
357 , g_term_let
358 ]
359 g_term_operators :: CF g (AST_Term src ss)
360 g_term_operators = rule "term_operators" $
361 catch $
362 left Error_Term_Gram_Fixity <$>
363 g_ops
364 where
365 g_ops :: CF g (Either Error_Fixity (AST_Term src ss))
366 g_ops = operators g_term_atom g_prefix g_infix g_postfix
367 g_prefix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss)
368 g_infix :: CF g (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss)
369 g_postfix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss)
370 g_prefix = catch $ withMeta $ modules_get $ op_prefix <$> g_prefix_op
371 g_infix = catch $ withMeta $ modules_get $ op_infix <$> g_infix_op
372 g_postfix = catch $ withMeta $ modules_get $ op_postfix <$> g_postfix_op
373 op_infix
374 :: Mod NameTe
375 -> Modules src ss
376 -> src
377 -> Either Error_Term_Gram
378 (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss)
379 op_infix name toks src = do
380 let (_pre, in_, _post) = modulesLookup name toks
381 case in_ of
382 Nothing -> Left $ Error_Term_Gram_Fixity_Need FixityPos_Infix
383 Just p ->
384 Right $ (token_fixity p,) $ \a b ->
385 (BinTree0 (token_term p src) `BinTree2` a) `BinTree2` b
386 op_prefix, op_postfix
387 :: Mod NameTe
388 -> Modules src ss
389 -> src
390 -> Either Error_Term_Gram
391 ( Unifix
392 , AST_Term src ss -> AST_Term src ss )
393 op_prefix name toks src = do
394 let (pre, _in_, _post) = modulesLookup name toks
395 case pre of
396 Nothing -> Left $ Error_Term_Gram_Fixity_Need FixityPos_Prefix
397 Just p ->
398 Right $ (token_fixity p,) $ \a ->
399 BinTree0 (token_term p src) `BinTree2` a
400 op_postfix name toks src = do
401 let (_pre, _in_, post) = modulesLookup name toks
402 case post of
403 Nothing -> Left $ Error_Term_Gram_Fixity_Need FixityPos_Postfix
404 Just p ->
405 Right $ (token_fixity p,) $ \a ->
406 BinTree0 (token_term p src) `BinTree2` a
407 g_postfix_op :: CF g (Mod NameTe)
408 g_postfix_op = rule "term_op_postfix" $
409 lexeme $
410 g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote)
411 g_term_mod_opname
412 g_infix_op :: CF g (Mod NameTe)
413 g_infix_op = rule "term_op_infix" $
414 lexeme $
415 between g_backquote g_backquote g_term_mod_idname <+>
416 try (Fun.const <$> g_term_mod_opname <*> (string " " <+> string "\n")) <+>
417 pure (Mod [] " ")
418 g_prefix_op :: CF g (Mod NameTe)
419 g_prefix_op = rule "term_op_prefix" $
420 lexeme $
421 g_term_mod_idname <* g_backquote <+>
422 g_term_mod_opname
423 g_backquote :: Gram_Terminal g' => g' Char
424 g_backquote = char '`'
425
426 g_term_atom :: CF g (AST_Term src ss)
427 g_term_atom = rule "term_atom" $
428 choice $
429 {-(try (
430 withMeta $
431 (\typ src -> BinTree0 $ inj_EToken src $ Token_Term_Type typ)
432 <$ char '@' <*> g_type) :) $
433 -}
434 (try <$> g_term_atomsR (Proxy @ss)) <>
435 [ try $
436 catch $ withMeta $ modules_get $
437 (\mn toks src -> do
438 let (_, in_, _) = modulesLookup mn toks
439 case in_ of
440 Just p -> Right $ BinTree0 $ token_term p src
441 Nothing ->
442 case mn of
443 [] `Mod` n -> Right $ BinTree0 $ Token_Term_Var src n
444 _ -> Left $ Error_Term_Gram_Fixity_Need FixityPos_Infix
445 ) <$> g_term_mod_name
446 , g_term_group
447 ]
448 g_term_group :: CF g (AST_Term src ss)
449 g_term_group = rule "term_group" $ parens g_term
450 g_term_abst :: CF g (AST_Term src ss)
451 g_term_abst = rule "term_abst" $
452 withMeta $
453 ((\(xs, te) src ->
454 foldr (\(x, ty_x) ->
455 BinTree0 . Token_Term_Abst src x ty_x) te xs) <$>) $
456 g_term_abst_args_body
457 (symbol "\\" *> some g_term_abst_decl <* symbol "->")
458 g_term
459 g_term_abst_args_body
460 :: CF g [(NameTe, AST_Type src)]
461 -> CF g (AST_Term src ss)
462 -> CF g ([(NameTe, AST_Type src)], AST_Term src ss)
463 -- g_term_abst_args_body args body = (,) <$> args <*> body
464 g_term_abst_args_body cf_args cf_body =
465 modules_put $ modules_get $
466 (\a b (toks::Modules src ss) -> (toks, (a, b)))
467 <$> (modules_put $ modules_get $
468 (\args (toks::Modules src ss) -> (,args)
469 Modules
470 { modules_prefix = del (modules_prefix toks) args
471 , modules_infix = ins (modules_infix toks) args
472 , modules_postfix = del (modules_postfix toks) args
473 }) <$> cf_args)
474 <*> cf_body
475 where
476 del = foldr $ \(n, _) -> Map.adjust (Map.delete n) []
477 ins = foldr $ \(n, _) ->
478 Map.insertWith (<>) [] $
479 Map.singleton n
480 Tokenizer
481 { token_term = \src -> Token_Term_Var src n
482 , token_fixity = infixN5
483 }
484 g_term_let :: CF g (AST_Term src ss)
485 g_term_let = rule "term_let" $
486 withMeta $
487 (\name args bound body src ->
488 BinTree0 $
489 Token_Term_Let src name
490 (foldr (\(x, ty_x) ->
491 BinTree0 . Token_Term_Abst src x ty_x) bound args) body)
492 <$ symbol "let"
493 <*> g_term_name
494 <*> many g_term_abst_decl
495 <* symbol "="
496 <*> g_term
497 <* symbol "in"
498 <*> g_term
499
500 deriving instance
501 ( Gram_Term src ss g
502 , Gram_Term_Atoms src ss (CF g)
503 , Show src
504 ) => Gram_Term src ss (CF g)
505 instance
506 ( Gram_Term_Atoms src ss EBNF
507 , Inj_Source (Text_of_Source src) src
508 , Show src
509 ) => Gram_Term src ss EBNF where
510 modules_get (CF (EBNF g)) = CF $ EBNF g
511 modules_put (CF (EBNF g)) = CF $ EBNF g
512 instance
513 ( Gram_Term_Atoms src ss RuleEBNF
514 , Inj_Source (Text_of_Source src) src
515 , Show src
516 ) => Gram_Term src ss RuleEBNF where
517 modules_get (CF (RuleEBNF (EBNF g))) = CF $ RuleEBNF $ EBNF g
518 modules_put (CF (RuleEBNF (EBNF g))) = CF $ RuleEBNF $ EBNF g
519
520 -- ** Class 'Gram_Term_Atoms'
521 type Gram_Term_Atoms src ss g = Gram_Term_AtomsR src ss ss g
522
523 -- *** Class 'Gram_Term_AtomsR'
524 class Gram_Term_AtomsR src (ss::[*]) (rs::[*]) g where
525 g_term_atomsR :: Proxy rs -> [CF g (AST_Term src ss)]
526 instance Gram_Term_AtomsR src ss '[] g where
527 g_term_atomsR _rs = []
528 instance
529 ( Gram_Term_AtomsFor src ss g t
530 , Gram_Term_AtomsR src ss rs g
531 ) => Gram_Term_AtomsR src ss (Proxy t ': rs) g where
532 g_term_atomsR _ =
533 g_term_atomsFor (Proxy @t) <>
534 g_term_atomsR (Proxy @rs)
535
536 -- *** Class 'Gram_Term_AtomsFor'
537 class Gram_Term_AtomsFor src ss g t where
538 g_term_atomsFor :: Proxy t -> [CF g (AST_Term src ss)]
539 g_term_atomsFor _t = []
540
541 gram_term
542 :: forall g.
543 ( Gram_Term () '[Proxy (->), Proxy Integer] g
544 ) => [CF g ()]
545 gram_term =
546 [ voiD g_term
547 , voiD g_term_operators
548 , voiD g_term_atom
549 , voiD g_term_group
550 , voiD g_term_abst
551 , void (g_term_abst_decl::CF g (NameTe, AST_Type ()))
552 , voiD g_term_let
553 , void g_term_mod_name
554 , void g_term_name
555 , void g_term_idname
556 , void $ cf_of_Terminal g_term_idname_tail
557 , void $ cf_of_Reg g_term_keywords
558 , void g_term_mod_opname
559 , void g_term_opname
560 , void $ cf_of_Terminal g_term_opname_ok
561 , void $ cf_of_Reg g_term_keysyms
562 ] where
563 voiD :: CF g (AST_Term () '[Proxy (->), Proxy Integer]) -> CF g ()
564 voiD = (() <$)