]> Git — Sourcephile - haskell/symantic.git/blob - symantic/Language/Symantic/Compiling/Grammar.hs
Improve handling of metadata in grammars.
[haskell/symantic.git] / symantic / Language / Symantic / Compiling / Grammar.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE ExistentialQuantification #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 module Language.Symantic.Compiling.Grammar where
8
9 import Control.Arrow (left)
10 import Control.Monad (void)
11 import Data.Map.Strict (Map)
12 import Data.Proxy (Proxy(..))
13 import Data.Semigroup (Semigroup(..))
14 import Data.String (IsString(..))
15 import Data.Text (Text)
16 import Prelude hiding (mod, not, any)
17 import qualified Data.Char as Char
18 import qualified Data.Function as Fun
19 import qualified Data.Map.Strict as Map
20 import qualified Data.Text as Text
21
22 import Language.Symantic.Grammar
23 import Language.Symantic.Typing
24 import Language.Symantic.Compiling.Term
25
26 -- * Type 'Mod'
27 data Mod a = Mod PathMod a
28 deriving (Eq, Functor, Ord, Show)
29
30 -- ** Type 'PathMod'
31 type PathMod = [NameMod]
32
33 -- ** Type 'NameMod'
34 newtype NameMod = NameMod Text
35 deriving (Eq, Ord, Show)
36
37 -- ** Type 'NameTe'
38 newtype NameTe = NameTe Text
39 deriving (Eq, Ord, Show)
40 instance IsString NameTe where
41 fromString = NameTe . fromString
42
43 -- * Type 'Modules'
44 data Modules src ss
45 = Modules
46 { modules_prefix :: Map PathMod (Map NameTe (Tokenizer Unifix src ss))
47 , modules_infix :: Map PathMod (Map NameTe (Tokenizer Infix src ss))
48 , modules_postfix :: Map PathMod (Map NameTe (Tokenizer Unifix src ss))
49 }
50
51 deriving instance
52 ( Show (Tokenizer Unifix src ss)
53 , Show (Tokenizer Infix src ss)
54 ) => Show (Modules src ss)
55 instance Semigroup (Modules src ss) where
56 x <> y =
57 Modules
58 { modules_prefix =
59 Map.unionWith (<>)
60 (modules_prefix x)
61 (modules_prefix y)
62 , modules_infix =
63 Map.unionWith (<>)
64 (modules_infix x)
65 (modules_infix y)
66 , modules_postfix =
67 Map.unionWith (<>)
68 (modules_postfix x)
69 (modules_postfix y)
70 }
71 instance Monoid (Modules src ss) where
72 mempty = Modules Map.empty Map.empty Map.empty
73 mappend = (<>)
74
75 -- ** Type 'Tokenizer'
76 data Tokenizer fixy src ss
77 = Tokenizer
78 { token_fixity :: fixy
79 , token_term :: src -> Token_Term src ss
80 }
81
82 -- ** Type 'Token_Term'
83 data Token_Term src ss
84 = Token_Term (TermVT_CF src ss)
85 | Token_Term_Abst src NameTe (AST_Type src) (AST_Term src ss)
86 | Token_Term_Var src NameTe
87 | Token_Term_Let src NameTe (AST_Term src ss) (AST_Term src ss)
88 | Token_Term_App src
89 deriving (Eq, Show)
90
91 -- ** Type 'AST_Term'
92 -- | /Abstract Syntax Tree/ of 'Token_Term'.
93 type AST_Term src ss = BinTree (Token_Term src ss)
94
95 -- * Class 'Inj_Modules'
96 type Inj_Modules src ss
97 = Inj_ModulesR src ss ss
98
99 inj_Modules :: forall src ss. Inj_Modules src ss => Modules src ss
100 inj_Modules = inj_ModulesR (Proxy @ss)
101
102 -- ** Class 'Inj_ModulesR'
103 class Inj_ModulesR src (ss::[*]) (rs::[*]) where
104 inj_ModulesR :: Proxy rs -> Modules src ss
105 instance Inj_ModulesR src ss '[] where
106 inj_ModulesR _rs = mempty
107 instance
108 ( Module src ss s
109 , Inj_ModulesR src ss rs
110 ) => Inj_ModulesR src ss (Proxy s ': rs) where
111 inj_ModulesR _ = inj_ModulesR (Proxy @rs) <> module_ (Proxy @s)
112
113 -- | Lookup the given 'Mod' 'NameTe' into the given 'Modules',
114 -- returning for prefix, infix and postfix positions, when there is a match.
115 modulesLookup ::
116 forall src ss.
117 Mod NameTe ->
118 Modules src ss ->
119 ( Maybe (Tokenizer Unifix src ss)
120 , Maybe (Tokenizer Infix src ss)
121 , Maybe (Tokenizer Unifix src ss)
122 )
123 modulesLookup mn@(mod `Mod` n) (Modules pres ins posts) = do
124 let pre = Map.lookup mod pres >>= Map.lookup n
125 let post = Map.lookup mod posts >>= Map.lookup n
126 let in_ =
127 case mn of
128 [] `Mod` " " -> Just
129 Tokenizer
130 { token_term = Token_Term_App @src @ss
131 , token_fixity = Infix (Just AssocL) 9
132 }
133 _ -> Map.lookup mod ins >>= Map.lookup n
134 (pre, in_, post)
135
136 -- * Class 'Module'
137 class Module src ss s where
138 module_ :: Proxy s -> Modules src ss
139 module_ _t = mempty
140
141 moduleWhere ::
142 forall src ss.
143 Source src =>
144 PathMod -> [DefTerm src ss] -> Modules src ss
145 moduleWhere mod lst =
146 Modules
147 { modules_infix = mk $ \(n `WithFixity` fixy := t) ->
148 case fixy of
149 Fixity2 inf -> [(n, Tokenizer inf $ Token_Term . setSource (TermVT_CF t))]
150 _ -> []
151 , modules_prefix = mk $ \(n `WithFixity` fixy := t) ->
152 case fixy of
153 Fixity1 pre@Prefix{} -> [(n, Tokenizer pre $ Token_Term . setSource (TermVT_CF t))]
154 _ -> []
155 , modules_postfix = mk $ \(n `WithFixity` fixy := t) ->
156 case fixy of
157 Fixity1 post@Postfix{} -> [(n, Tokenizer post $ Token_Term . setSource (TermVT_CF t))]
158 _ -> []
159 }
160 where
161 mk ::
162 (DefTerm src ss -> [(NameTe, Tokenizer fixy src ss)]) ->
163 Map PathMod (Map NameTe (Tokenizer fixy src ss))
164 mk = Map.singleton mod . Map.fromList . (`foldMap` lst)
165
166 -- ** Type 'DefTerm'
167 data DefTerm src ss
168 = forall vs t.
169 (:=) (WithFixity NameTe)
170 (forall es. Term src ss es vs t)
171
172 -- ** Type 'WithFixity'
173 data WithFixity a
174 = WithFixity a Fixity
175 deriving (Eq, Show)
176 instance IsString (WithFixity NameTe) where
177 fromString a = WithFixity (fromString a) (Fixity2 infixN5)
178
179 withInfix :: a -> Infix -> WithFixity a
180 withInfix a inf = a `WithFixity` Fixity2 inf
181 withInfixR :: a -> Precedence -> WithFixity a
182 withInfixR a p = a `WithFixity` Fixity2 (infixR p)
183 withInfixL :: a -> Precedence -> WithFixity a
184 withInfixL a p = a `WithFixity` Fixity2 (infixL p)
185 withInfixN :: a -> Precedence -> WithFixity a
186 withInfixN a p = a `WithFixity` Fixity2 (infixN p)
187 withInfixB :: a -> (Side, Precedence) -> WithFixity a
188 withInfixB a (lr, p) = a `WithFixity` Fixity2 (infixB lr p)
189 withPrefix :: a -> Precedence -> WithFixity a
190 withPrefix a p = a `WithFixity` Fixity1 (Prefix p)
191 withPostfix :: a -> Precedence -> WithFixity a
192 withPostfix a p = a `WithFixity` Fixity1 (Postfix p)
193
194 -- * Class 'Gram_Name'
195 class
196 ( Gram_Terminal g
197 , Gram_Rule g
198 , Gram_Alt g
199 , Gram_Try g
200 , Gram_App g
201 , Gram_AltApp g
202 , Gram_RegL g
203 , Gram_CF g
204 , Gram_Comment g
205 , Gram_Op g
206 ) => Gram_Name g where
207 g_mod_path :: CF g PathMod
208 g_mod_path = rule "mod_path" $
209 infixrG
210 (pure <$> g_mod_name)
211 (op <$ char '.')
212 where op = (<>)
213 g_mod_name :: CF g NameMod
214 g_mod_name = rule "mod_name" $
215 (NameMod . Text.pack <$>) $
216 (identG `minus`) $
217 Fun.const
218 <$> g_term_keywords
219 <*. (any `but` g_term_idname_tail)
220 where
221 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
222 headG = unicat $ Unicat Char.UppercaseLetter
223
224 g_term_mod_name :: CF g (Mod NameTe)
225 g_term_mod_name = rule "term_mod_name" $
226 lexeme $
227 g_term_mod_idname <+>
228 parens g_term_mod_opname
229 g_term_name :: CF g NameTe
230 g_term_name = rule "term_name" $
231 lexeme $
232 g_term_idname <+>
233 parens g_term_opname
234
235 g_term_mod_idname :: CF g (Mod NameTe)
236 g_term_mod_idname = rule "term_mod_idname" $
237 Mod
238 <$> option [] (try $ g_mod_path <* char '.')
239 <*> g_term_idname
240 g_term_idname :: CF g NameTe
241 g_term_idname = rule "term_idname" $
242 (NameTe . Text.pack <$>) $
243 (identG `minus`) $
244 Fun.const
245 <$> g_term_keywords
246 <*. (any `but` g_term_idname_tail)
247 where
248 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
249 headG = unicat $ Unicat_Letter
250 g_term_idname_tail :: Terminal g Char
251 g_term_idname_tail = rule "term_idname_tail" $
252 unicat Unicat_Letter <+>
253 unicat Unicat_Number
254 g_term_keywords :: Reg rl g String
255 g_term_keywords = rule "term_keywords" $
256 choice $ string <$> ["in", "let"]
257
258 g_term_mod_opname :: CF g (Mod NameTe)
259 g_term_mod_opname = rule "term_mod_opname" $
260 Mod
261 <$> option [] (try $ g_mod_path <* char '.')
262 <*> g_term_opname
263 g_term_opname :: CF g NameTe
264 g_term_opname = rule "term_opname" $
265 (NameTe . Text.pack <$>) $
266 (symG `minus`) $
267 Fun.const
268 <$> g_term_keysyms
269 <*. (any `but` g_term_opname_ok)
270 where
271 symG = some $ cf_of_Terminal g_term_opname_ok
272 g_term_opname_ok :: Terminal g Char
273 g_term_opname_ok = rule "term_opname_ok" $
274 choice (unicat <$>
275 [ Unicat_Symbol
276 , Unicat_Punctuation
277 , Unicat_Mark
278 ]) `but` koG
279 where
280 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
281 g_term_keysyms :: Reg rl g String
282 g_term_keysyms = rule "term_keysyms" $
283 choice $ string <$> ["\\", "->", "=", "@"]
284
285 deriving instance Gram_Name g => Gram_Name (CF g)
286 instance Gram_Name EBNF
287 instance Gram_Name RuleEBNF
288
289 -- * Class 'Gram_Term_Type'
290 class
291 ( Gram_Terminal g
292 , Gram_Rule g
293 , Gram_Alt g
294 , Gram_AltApp g
295 , Gram_App g
296 , Gram_CF g
297 , Gram_Comment g
298 , Gram_Name g
299 , Gram_Type src g
300 ) => Gram_Term_Type src g where
301 g_term_abst_decl
302 :: CF g (NameTe, AST_Type src)
303 g_term_abst_decl = rule "term_abst_decl" $
304 parens $ (,)
305 <$> g_term_name
306 <* (symbol "::" <+> symbol ":")
307 -- NOTE: "::" is Haskell compatibility and ":" is another common notation.
308 <*> g_type
309
310 deriving instance Gram_Term_Type src g => Gram_Term_Type src (CF g)
311 instance Gram_Source src EBNF => Gram_Term_Type src EBNF
312 instance Gram_Source src RuleEBNF => Gram_Term_Type src RuleEBNF
313
314 -- ** Type 'Error_Term_Gram'
315 data Error_Term_Gram
316 = Error_Term_Gram_Fixity Error_Fixity
317 | Error_Term_Gram_Fixity_Need FixityPos
318 | Error_Term_Gram_Term_incomplete
319 | Error_Term_Gram_Type_applied_to_nothing
320 | Error_Term_Gram_not_applicable
321 | Error_Term_Gram_application
322 | Error_Term_Gram_application_mismatch
323 deriving (Eq, Show)
324
325 -- *** Type 'FixityPos'
326 data FixityPos
327 = FixityPos_Prefix
328 | FixityPos_Infix
329 | FixityPos_Postfix
330 deriving (Eq, Show)
331
332 -- * Class 'Gram_Term'
333 class
334 ( Gram_Source src g
335 , Gram_Error Error_Term_Gram g
336 , Gram_Terminal g
337 , Gram_Rule g
338 , Gram_Alt g
339 , Gram_App g
340 , Gram_AltApp g
341 , Gram_CF g
342 , Gram_Comment g
343 , Gram_Type src g
344 , Gram_Name g
345 , Gram_Term_Type src g
346 , Gram_Term_Atoms src ss g
347 , Gram_State (Modules src ss) g
348 ) => Gram_Term src ss g where
349 -- getModules :: CF g (Modules src ss -> a) -> CF g a
350 -- setModules :: CF g (Modules src ss, a) -> CF g a
351 g_term :: CF g (AST_Term src ss)
352 g_term = rule "term" $
353 choice
354 [ try g_term_abst
355 , g_term_operators
356 , g_term_let
357 ]
358 g_term_operators :: CF g (AST_Term src ss)
359 g_term_operators = rule "term_operators" $
360 g_catch $
361 left Error_Term_Gram_Fixity <$>
362 g_ops
363 where
364 g_ops :: CF g (Either Error_Fixity (AST_Term src ss))
365 g_ops = operators g_term_atom g_prefix g_infix g_postfix
366 g_prefix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss)
367 g_infix :: CF g (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss)
368 g_postfix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss)
369 g_prefix = g_catch $ g_source $ g_get_after $ op_prefix <$> g_prefix_op
370 g_infix = g_catch $ g_source $ g_get_after $ op_infix <$> g_infix_op
371 g_postfix = g_catch $ g_source $ g_get_after $ op_postfix <$> g_postfix_op
372 op_infix
373 :: Mod NameTe
374 -> Modules src ss
375 -> src
376 -> Either Error_Term_Gram
377 (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss)
378 op_infix name toks src = do
379 let (_pre, in_, _post) = modulesLookup name toks
380 case in_ of
381 Nothing -> Left $ Error_Term_Gram_Fixity_Need FixityPos_Infix
382 Just p ->
383 Right $ (token_fixity p,) $ \a b ->
384 (BinTree0 (token_term p src) `BinTree2` a) `BinTree2` b
385 op_prefix, op_postfix
386 :: Mod NameTe
387 -> Modules src ss
388 -> src
389 -> Either Error_Term_Gram
390 ( Unifix
391 , AST_Term src ss -> AST_Term src ss )
392 op_prefix name toks src = do
393 let (pre, _in_, _post) = modulesLookup name toks
394 case pre of
395 Nothing -> Left $ Error_Term_Gram_Fixity_Need FixityPos_Prefix
396 Just p ->
397 Right $ (token_fixity p,) $ \a ->
398 BinTree0 (token_term p src) `BinTree2` a
399 op_postfix name toks src = do
400 let (_pre, _in_, post) = modulesLookup name toks
401 case post of
402 Nothing -> Left $ Error_Term_Gram_Fixity_Need FixityPos_Postfix
403 Just p ->
404 Right $ (token_fixity p,) $ \a ->
405 BinTree0 (token_term p src) `BinTree2` a
406 g_postfix_op :: CF g (Mod NameTe)
407 g_postfix_op = rule "term_op_postfix" $
408 lexeme $
409 g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote)
410 g_term_mod_opname
411 g_infix_op :: CF g (Mod NameTe)
412 g_infix_op = rule "term_op_infix" $
413 lexeme $
414 between g_backquote g_backquote g_term_mod_idname <+>
415 try (Fun.const <$> g_term_mod_opname <*> (string " " <+> string "\n")) <+>
416 pure (Mod [] " ")
417 g_prefix_op :: CF g (Mod NameTe)
418 g_prefix_op = rule "term_op_prefix" $
419 lexeme $
420 g_term_mod_idname <* g_backquote <+>
421 g_term_mod_opname
422 g_backquote :: Gram_Terminal g' => g' Char
423 g_backquote = char '`'
424
425 g_term_atom :: CF g (AST_Term src ss)
426 g_term_atom = rule "term_atom" $
427 choice $
428 {-(try (
429 g_source $
430 (\typ src -> BinTree0 $ inj_EToken src $ Token_Term_Type typ)
431 <$ char '@' <*> g_type) :) $
432 -}
433 (try <$> g_term_atomsR (Proxy @ss)) <>
434 [ try $
435 g_catch $ g_source $ g_get_after $
436 (\mn toks src -> do
437 let (_, in_, _) = modulesLookup mn toks
438 case in_ of
439 Just p -> Right $ BinTree0 $ token_term p src
440 Nothing ->
441 case mn of
442 [] `Mod` n -> Right $ BinTree0 $ Token_Term_Var src n
443 _ -> Left $ Error_Term_Gram_Fixity_Need FixityPos_Infix
444 ) <$> g_term_mod_name
445 , g_term_group
446 ]
447 g_term_group :: CF g (AST_Term src ss)
448 g_term_group = rule "term_group" $ parens g_term
449 g_term_abst :: CF g (AST_Term src ss)
450 g_term_abst = rule "term_abst" $
451 g_source $
452 ((\(xs, te) src ->
453 foldr (\(x, ty_x) ->
454 BinTree0 . Token_Term_Abst src x ty_x) te xs) <$>) $
455 g_term_abst_args_body
456 (symbol "\\" *> some g_term_abst_decl <* symbol "->")
457 g_term
458 g_term_abst_args_body
459 :: CF g [(NameTe, AST_Type src)]
460 -> CF g (AST_Term src ss)
461 -> CF g ([(NameTe, AST_Type src)], AST_Term src ss)
462 -- g_term_abst_args_body args body = (,) <$> args <*> body
463 g_term_abst_args_body cf_args cf_body =
464 g_state_before $
465 (\a b (toks::Modules src ss) -> (toks, (a, b)))
466 <$> (g_state_after $
467 (\args (toks::Modules src ss) -> (,args)
468 Modules
469 { modules_prefix = del (modules_prefix toks) args
470 , modules_infix = ins (modules_infix toks) args
471 , modules_postfix = del (modules_postfix toks) args
472 }) <$> cf_args)
473 <*> cf_body
474 where
475 del = foldr $ \(n, _) -> Map.adjust (Map.delete n) []
476 ins = foldr $ \(n, _) ->
477 Map.insertWith (<>) [] $
478 Map.singleton n
479 Tokenizer
480 { token_term = \src -> Token_Term_Var src n
481 , token_fixity = infixN5
482 }
483 g_term_let :: CF g (AST_Term src ss)
484 g_term_let = rule "term_let" $
485 g_source $
486 (\name args bound body src ->
487 BinTree0 $
488 Token_Term_Let src name
489 (foldr (\(x, ty_x) ->
490 BinTree0 . Token_Term_Abst src x ty_x) bound args) body)
491 <$ symbol "let"
492 <*> g_term_name
493 <*> many g_term_abst_decl
494 <* symbol "="
495 <*> g_term
496 <* symbol "in"
497 <*> g_term
498
499 deriving instance
500 ( Gram_Term src ss g
501 , Gram_Term_Atoms src ss (CF g)
502 ) => Gram_Term src ss (CF g)
503 instance
504 ( Gram_Term_Atoms src ss EBNF
505 , Gram_Source src EBNF
506 ) => Gram_Term src ss EBNF
507 instance
508 ( Gram_Term_Atoms src ss RuleEBNF
509 , Gram_Source src RuleEBNF
510 ) => Gram_Term src ss RuleEBNF
511
512 -- ** Class 'Gram_Term_Atoms'
513 type Gram_Term_Atoms src ss g = Gram_Term_AtomsR src ss ss g
514
515 -- *** Class 'Gram_Term_AtomsR'
516 class Gram_Term_AtomsR src (ss::[*]) (rs::[*]) g where
517 g_term_atomsR :: Proxy rs -> [CF g (AST_Term src ss)]
518 instance Gram_Term_AtomsR src ss '[] g where
519 g_term_atomsR _rs = []
520 instance
521 ( Gram_Term_AtomsFor src ss g t
522 , Gram_Term_AtomsR src ss rs g
523 ) => Gram_Term_AtomsR src ss (Proxy t ': rs) g where
524 g_term_atomsR _ =
525 g_term_atomsFor (Proxy @t) <>
526 g_term_atomsR (Proxy @rs)
527
528 -- *** Class 'Gram_Term_AtomsFor'
529 class Gram_Term_AtomsFor src ss g t where
530 g_term_atomsFor :: Proxy t -> [CF g (AST_Term src ss)]
531 g_term_atomsFor _t = []
532
533 gram_term
534 :: forall g.
535 ( Gram_Term () '[Proxy (->), Proxy Integer] g
536 ) => [CF g ()]
537 gram_term =
538 [ voiD g_term
539 , voiD g_term_operators
540 , voiD g_term_atom
541 , voiD g_term_group
542 , voiD g_term_abst
543 , void (g_term_abst_decl::CF g (NameTe, AST_Type ()))
544 , voiD g_term_let
545 , void g_term_mod_name
546 , void g_term_name
547 , void g_term_idname
548 , void $ cf_of_Terminal g_term_idname_tail
549 , void $ cf_of_Reg g_term_keywords
550 , void g_term_mod_opname
551 , void g_term_opname
552 , void $ cf_of_Terminal g_term_opname_ok
553 , void $ cf_of_Reg g_term_keysyms
554 ] where
555 voiD :: CF g (AST_Term () '[Proxy (->), Proxy Integer]) -> CF g ()
556 voiD = (() <$)