]> Git — Sourcephile - haskell/symantic.git/blob - symantic/Language/Symantic/Compiling/Term/Grammar.hs
Fix time&space explosion of GHC's typechecker.
[haskell/symantic.git] / symantic / Language / Symantic / Compiling / Term / Grammar.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
9
10 import Control.Arrow (left)
11 import Control.Monad (foldM, void, (=<<))
12 import qualified Data.Char as Char
13 import qualified Data.Function as Fun
14 import Data.Map.Strict (Map)
15 import qualified Data.Map.Strict as Map
16 import Data.Proxy (Proxy(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Text (Text)
19 import qualified Data.Text as Text
20 import Prelude hiding (mod, not, any)
21
22 import Language.Symantic.Parsing
23 import Language.Symantic.Typing
24
25 -- * Type 'Term_Name'
26 newtype Term_Name = Term_Name Text
27 deriving (Eq, Ord, Show)
28
29 -- * Type 'ProTok'
30 -- | Proto 'EToken'. It's almost like a free monad,
31 -- but has a third constructor ('ProTokPi')
32 -- to require a type argument.
33 --
34 -- NOTE: this type may one day be removed
35 -- if proper type inferencing is done.
36 -- In the meantime it is used to require
37 -- term or type arguments needed to build
38 -- the 'EToken's of polymorphic terms.
39 data ProTok meta ts
40 = ProTokLam (EToken meta ts -> ProTok meta ts)
41 -- ^ Require a term argument.
42 | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts)
43 -- ^ Require a type argument.
44 | ProTok (EToken meta ts)
45 -- ^ No need for any argument.
46
47 -- | Declared here and not in @Compiling.Lambda@
48 -- to be able to use 'Token_Term_Var' in 'protok'.
49 data instance TokenT meta (ts::[*]) (Proxy (->))
50 = Token_Term_Abst Term_Name (EToken meta '[Proxy Token_Type]) (EToken meta ts)
51 | Token_Term_App (EToken meta ts) (EToken meta ts)
52 | Token_Term_Let Term_Name (EToken meta ts) (EToken meta ts)
53 | Token_Term_Var Term_Name
54 | Token_Term_Compose (EToken meta ts) (EToken meta ts)
55
56 -- * Class 'Tokenize'
57 type Tokenize meta ts
58 = TokenizeR meta ts ts
59
60 -- ** Type 'Tokenizers'
61 data Tokenizers meta ts
62 = Tokenizers
63 { tokenizers_prefix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
64 , tokenizers_infix :: Map Mod_Path (Map Term_Name (Term_ProTok Infix meta ts))
65 , tokenizers_postfix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
66 }
67 instance Semigroup (Tokenizers meta ts) where
68 x <> y =
69 Tokenizers
70 (Map.unionWith Map.union
71 (tokenizers_prefix x)
72 (tokenizers_prefix y))
73 (Map.unionWith Map.union
74 (tokenizers_infix x)
75 (tokenizers_infix y))
76 (Map.unionWith Map.union
77 (tokenizers_postfix x)
78 (tokenizers_postfix y))
79 instance Monoid (Tokenizers meta ts) where
80 mempty = Tokenizers Map.empty Map.empty Map.empty
81 mappend = (<>)
82
83 -- ** Type 'Term_ProTok'
84 data Term_ProTok fixy meta ts
85 = Term_ProTok
86 { term_protok :: meta -> ProTok meta ts
87 , term_fixity :: fixy
88 }
89
90 tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts
91 tokenizers = tokenizeR (Proxy @ts)
92
93 unProTok
94 :: ProTok meta ts
95 -> Either Error_Term_Gram (EToken meta ts)
96 unProTok (ProTok t) = Right t
97 unProTok _ = Left Error_Term_Gram_Term_incomplete
98
99 protok
100 :: Inj_Token meta ts (->)
101 => Mod Term_Name
102 -> Tokenizers meta ts
103 -> Either Error_Term_Gram
104 ( Maybe (Term_ProTok Unifix meta ts)
105 , Term_ProTok Infix meta ts
106 , Maybe (Term_ProTok Unifix meta ts)
107 )
108 protok (mod `Mod` tn) (Tokenizers pres ins posts) = do
109 let pre = Map.lookup mod pres >>= Map.lookup tn
110 let post = Map.lookup mod posts >>= Map.lookup tn
111 in_ <- var_or_err $ Map.lookup mod ins >>= Map.lookup tn
112 return (pre, in_, post)
113 where
114 var_or_err (Just x) = Right x
115 var_or_err Nothing =
116 case mod of
117 [] -> Right $ var infixN5
118 _ -> Left $ Error_Term_Gram_Undefined_term
119 var term_fixity =
120 Term_ProTok
121 { term_protok = \meta -> ProTok $ inj_EToken meta $ Token_Term_Var tn
122 , term_fixity
123 }
124
125 protok_app
126 :: Inj_Token meta ts (->)
127 => ProTok meta ts
128 -> [Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)]
129 -> Either Error_Term_Gram (ProTok meta ts)
130 protok_app =
131 foldM app
132 where
133 app acc (Left typ) =
134 case acc of
135 ProTokPi g -> Right $ g typ
136 _ -> Left Error_Term_Gram_Cannot_apply_type
137 app acc (Right te) =
138 case acc of
139 ProTokLam f -> Right $ f te
140 ProTok tok@(EToken e) -> Right $
141 ProTok $ inj_EToken (meta_of e) $
142 Token_Term_App tok te
143 _ -> Left Error_Term_Gram_Cannot_apply_term
144
145 -- ** Class 'TokenizeR'
146 class TokenizeR meta (ts::[*]) (rs::[*]) where
147 tokenizeR :: Proxy rs -> Tokenizers meta ts
148 instance TokenizeR meta ts '[] where
149 tokenizeR _rs = mempty
150 instance
151 ( TokenizeT meta ts t
152 , TokenizeR meta ts rs
153 ) => TokenizeR meta ts (t ': rs) where
154 tokenizeR _ =
155 tokenizeR (Proxy @rs) <>
156 tokenizeT (Proxy @t)
157
158 -- ** Class 'TokenizeT'
159 class TokenizeT meta ts t where
160 tokenizeT :: Proxy t -> Tokenizers meta ts
161 -- tokenizeT _t = [] `Mod` []
162 tokenizeT _t = mempty
163
164 tokenizeTMod
165 :: Mod_Path
166 -> [(Term_Name, Term_ProTok fix meta ts)]
167 -> Map Mod_Path (Map Term_Name (Term_ProTok fix meta ts))
168 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
169
170 tokenize0
171 :: Inj_Token meta ts t
172 => Text -> fixity -> TokenT meta ts (Proxy t)
173 -> (Term_Name, Term_ProTok fixity meta ts)
174 tokenize0 n term_fixity tok =
175 (Term_Name n,) Term_ProTok
176 { term_protok = \meta -> ProTok $ inj_EToken meta $ tok
177 , term_fixity }
178
179 tokenize1
180 :: Inj_Token meta ts t
181 => Text -> fixity
182 -> (EToken meta ts -> TokenT meta ts (Proxy t))
183 -> (Term_Name, Term_ProTok fixity meta ts)
184 tokenize1 n term_fixity tok =
185 (Term_Name n,) Term_ProTok
186 { term_protok = \meta ->
187 ProTokLam $ \a ->
188 ProTok $ inj_EToken meta $ tok a
189 , term_fixity }
190
191 tokenize2
192 :: Inj_Token meta ts t
193 => Text -> fixity
194 -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
195 -> (Term_Name, Term_ProTok fixity meta ts)
196 tokenize2 n term_fixity tok =
197 (Term_Name n,) Term_ProTok
198 { term_protok = \meta ->
199 ProTokLam $ \a -> ProTokLam $ \b ->
200 ProTok $ inj_EToken meta $ tok a b
201 , term_fixity
202 }
203
204 tokenize3
205 :: Inj_Token meta ts t
206 => Text -> fixity
207 -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
208 -> (Term_Name, Term_ProTok fixity meta ts)
209 tokenize3 n term_fixity tok =
210 (Term_Name n,) Term_ProTok
211 { term_protok = \meta ->
212 ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c ->
213 ProTok $ inj_EToken meta $ tok a b c
214 , term_fixity
215 }
216
217 -- * Type 'Mod'
218 type Mod_Path = [Mod_Name]
219 newtype Mod_Name = Mod_Name Text
220 deriving (Eq, Ord, Show)
221 data Mod a = Mod Mod_Path a
222 deriving (Eq, Functor, Ord, Show)
223
224 -- * Class 'Gram_Term_Name'
225 class
226 ( Alt g
227 , Alter g
228 , Alter g
229 , App g
230 , Try g
231 , Gram_CF g
232 , Gram_Op g
233 , Gram_Lexer g
234 , Gram_RegL g
235 , Gram_Rule g
236 , Gram_Terminal g
237 ) => Gram_Term_Name g where
238 mod_path :: CF g Mod_Path
239 mod_path = rule "mod_path" $
240 infixrG
241 (pure <$> mod_name)
242 (op <$ char '.')
243 where op = (<>)
244 mod_name :: CF g Mod_Name
245 mod_name = rule "mod_name" $
246 (Mod_Name . Text.pack <$>) $
247 identG `minus`
248 (Fun.const
249 <$> term_keywords
250 <*. (any `but` term_idname_tail))
251 where
252 identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail)
253 headG = unicat $ Unicat Char.UppercaseLetter
254
255 term_mod_name :: CF g (Mod Term_Name)
256 term_mod_name = rule "term_mod_name" $
257 lexeme $
258 term_mod_idname <+>
259 parens term_mod_opname
260 term_name :: CF g Term_Name
261 term_name = rule "term_name" $
262 lexeme $
263 term_idname <+>
264 parens term_opname
265
266 term_mod_idname :: CF g (Mod Term_Name)
267 term_mod_idname = rule "term_mod_idname" $
268 Mod
269 <$> option [] (try $ mod_path <* char '.')
270 <*> term_idname
271 term_idname :: CF g Term_Name
272 term_idname = rule "term_idname" $
273 (Term_Name . Text.pack <$>) $
274 (identG `minus`) $
275 Fun.const
276 <$> term_keywords
277 <*. (any `but` term_idname_tail)
278 where
279 identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail)
280 headG = unicat $ Unicat_Letter
281 term_idname_tail :: Terminal g Char
282 term_idname_tail = rule "term_idname_tail" $
283 unicat Unicat_Letter <+>
284 unicat Unicat_Number
285 term_keywords :: Reg rl g String
286 term_keywords = rule "term_keywords" $
287 choice $ string <$> ["in", "let"]
288
289 term_mod_opname :: CF g (Mod Term_Name)
290 term_mod_opname = rule "term_mod_opname" $
291 Mod
292 <$> option [] (try $ mod_path <* char '.')
293 <*> term_opname
294 term_opname :: CF g Term_Name
295 term_opname = rule "term_opname" $
296 (Term_Name . Text.pack <$>) $
297 (symG `minus`) $
298 Fun.const
299 <$> term_keysyms
300 <*. (any `but` term_opname_ok)
301 where
302 symG = some $ cf_of_Terminal term_opname_ok
303 term_opname_ok :: Terminal g Char
304 term_opname_ok = rule "term_opname_ok" $
305 choice (unicat <$>
306 [ Unicat_Symbol
307 , Unicat_Punctuation
308 , Unicat_Mark
309 ]) `but` koG
310 where
311 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
312 term_keysyms :: Reg rl g String
313 term_keysyms = rule "term_keysyms" $
314 choice $ string <$> ["\\", "->", "="]
315
316 deriving instance Gram_Term_Name g => Gram_Term_Name (CF g)
317 instance Gram_Term_Name EBNF
318 instance Gram_Term_Name RuleDef
319
320 -- * Class 'Gram_Term_Type'
321 class
322 ( Alt g
323 , Alter g
324 , App g
325 , Gram_CF g
326 , Gram_Lexer g
327 , Gram_Meta meta g
328 , Gram_Rule g
329 , Gram_Terminal g
330 , Gram_Term_Name g
331 , Gram_Type meta g
332 ) => Gram_Term_Type meta g where
333 term_abst_decl
334 :: CF g (Term_Name, TokType meta)
335 term_abst_decl = rule "term_abst_decl" $
336 parens $ (,)
337 <$> term_name
338 <* symbol ":"
339 <*> typeG
340
341 deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g)
342 instance Gram_Term_Type meta EBNF
343 instance Gram_Term_Type meta RuleDef
344
345 -- * Class 'Gram_Error'
346 class Gram_Error g where
347 term_unError :: CF g (Either Error_Term_Gram a) -> CF g a
348 deriving instance Gram_Error g => Gram_Error (CF g)
349 instance Gram_Error EBNF where
350 term_unError (CF (EBNF g)) = CF $ EBNF g
351 instance Gram_Error RuleDef where
352 term_unError (CF (RuleDef (EBNF g))) =
353 CF $ RuleDef $ EBNF g
354
355 -- ** Type 'Error_Term_Gram'
356 data Error_Term_Gram
357 = Error_Term_Gram_Fixity Error_Fixity
358 | Error_Term_Gram_Cannot_apply_term
359 | Error_Term_Gram_Cannot_apply_type
360 | Error_Term_Gram_Undefined_term
361 | Error_Term_Gram_Term_incomplete
362 deriving (Eq, Show)
363
364 -- * Class 'Gram_Term'
365 class
366 ( Alt g
367 , Alter g
368 , App g
369 , Gram_CF g
370 , Gram_Lexer g
371 , Gram_Meta meta g
372 , Gram_Rule g
373 , Gram_Terminal g
374 , Gram_Error g
375 , Gram_Term_AtomsR meta ts ts g
376 , Gram_Term_Name g
377 , Gram_Term_Type meta g
378 , Gram_Type meta g
379 ) => Gram_Term ts meta g where
380 -- | Wrap 'term_abst'. Useful to modify body's scope.
381 term_abst_args_body
382 :: CF g [(Term_Name, TokType meta)]
383 -> CF g (EToken meta ts)
384 -> CF g ([(Term_Name, TokType meta)], EToken meta ts)
385 term_abst_args_body args body = (,) <$> args <*> body
386 term_tokenizers :: CF g (Tokenizers meta ts -> a) -> CF g a
387
388 termG
389 :: Inj_Tokens meta ts '[Proxy (->)]
390 => CF g (EToken meta ts)
391 termG = rule "term" $
392 choice
393 [ try term_abst
394 , term_operators
395 , term_let
396 ]
397 term_operators
398 :: Inj_Tokens meta ts '[Proxy (->)]
399 => CF g (EToken meta ts)
400 term_operators = rule "term_operators" $
401 term_unError $
402 term_unError $
403 left Error_Term_Gram_Fixity <$>
404 operators
405 (Right <$> term_app)
406 (term_unError $ metaG $ term_tokenizers $ op_prefix <$> term_op_prefix)
407 (term_unError $ metaG $ term_tokenizers $ op_infix <$> term_op_infix)
408 (term_unError $ metaG $ term_tokenizers $ op_postfix <$> term_op_postfix)
409 where
410 bqG :: Gram_Terminal g' => g' Char
411 bqG = char '`'
412 op_infix name toks meta = do
413 (_pre, in_, _post) <- protok name toks
414 return $
415 (term_fixity in_,) $ \ma mb -> do
416 a <- ma
417 b <- mb
418 unProTok =<< term_protok in_ meta `protok_app` [Right a, Right b]
419 op_prefix name toks meta = do
420 (pre, _in_, _post) <- protok name toks
421 case pre of
422 Just p ->
423 Right $ (term_fixity p,) $ (=<<) $ \a ->
424 unProTok =<< term_protok p meta `protok_app` [Right a]
425 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
426 op_postfix name toks meta = do
427 (_pre, _in_, post) <- protok name toks
428 case post of
429 Just p ->
430 Right $ (term_fixity p,) $ (=<<) $ \a ->
431 unProTok =<< term_protok p meta `protok_app` [Right a]
432 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
433 term_op_postfix :: CF g (Mod Term_Name)
434 term_op_postfix = rule "term_op_postfix" $
435 lexeme $
436 bqG *> term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` bqG)
437 term_mod_opname
438 term_op_infix :: CF g (Mod Term_Name)
439 term_op_infix = rule "term_op_infix" $
440 lexeme $
441 between bqG bqG term_mod_idname <+>
442 term_mod_opname
443 term_op_prefix :: CF g (Mod Term_Name)
444 term_op_prefix = rule "term_op_prefix" $
445 lexeme $
446 term_mod_idname <* bqG <+>
447 term_mod_opname
448 term_app
449 :: Inj_Tokens meta ts '[Proxy (->)]
450 => CF g (EToken meta ts)
451 term_app = rule "term_app" $
452 term_unError $
453 (\a as -> unProTok =<< protok_app a as)
454 <$> term_atom_proto
455 <*> many (try term_atom)
456 term_atom
457 :: Inj_Tokens meta ts '[Proxy (->)]
458 => CF g (Either (EToken meta '[Proxy Token_Type])
459 (EToken meta ts))
460 term_atom = rule "term_atom" $
461 (Left <$ char '@' <*> typeG) <+>
462 (Right <$> term_unError (unProTok <$> term_atom_proto))
463 term_atom_proto
464 :: Inj_Tokens meta ts '[Proxy (->)]
465 => CF g (ProTok meta ts)
466 term_atom_proto =
467 choice $
468 try <$> term_atomsR (Proxy @ts) <>
469 [ try $
470 metaG $ ((\(_, in_, _) -> term_protok in_) <$>) $
471 term_unError $
472 term_tokenizers $
473 protok <$> term_mod_name
474 , ProTok <$> term_group
475 ]
476 term_group
477 :: Inj_Tokens meta ts '[Proxy (->)]
478 => CF g (EToken meta ts)
479 term_group = rule "term_group" $ parens termG
480 term_abst
481 :: Inj_Tokens meta ts '[Proxy (->)]
482 => CF g (EToken meta ts)
483 term_abst = rule "term_abst" $
484 metaG $
485 ((\(xs, te) meta ->
486 foldr (\(x, ty_x) ->
487 inj_EToken meta .
488 Token_Term_Abst x ty_x) te xs) <$>) $
489 term_abst_args_body
490 (symbol "\\" *> some term_abst_decl <* symbol "->")
491 termG
492 term_let
493 :: Inj_Tokens meta ts '[Proxy (->)]
494 => CF g (EToken meta ts)
495 term_let = rule "term_let" $
496 metaG $
497 (\name args bound body meta ->
498 inj_EToken meta $
499 Token_Term_Let name
500 (foldr (\(x, ty_x) ->
501 inj_EToken meta . Token_Term_Abst x ty_x) bound args) body)
502 <$ symbol "let"
503 <*> term_name
504 <*> many term_abst_decl
505 <* symbol "="
506 <*> termG
507 <* symbol "in"
508 <*> termG
509
510 deriving instance
511 ( Gram_Term ts meta g
512 , Gram_Term_AtomsR meta ts ts (CF g)
513 ) => Gram_Term ts meta (CF g)
514 instance
515 Gram_Term_AtomsR meta ts ts EBNF =>
516 Gram_Term ts meta EBNF where
517 term_tokenizers (CF (EBNF g)) = CF $ EBNF g
518 instance
519 Gram_Term_AtomsR meta ts ts RuleDef =>
520 Gram_Term ts meta RuleDef where
521 term_tokenizers (CF (RuleDef (EBNF g))) =
522 CF $ RuleDef $ EBNF g
523
524 -- ** Class 'Gram_Term_AtomsR'
525 class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where
526 term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)]
527 instance Gram_Term_AtomsR meta ts '[] g where
528 term_atomsR _rs = []
529 instance
530 ( Gram_Term_AtomsT meta ts t g
531 , Gram_Term_AtomsR meta ts rs g
532 ) => Gram_Term_AtomsR meta ts (t ': rs) g where
533 term_atomsR _ =
534 term_atomsT (Proxy @t) <>
535 term_atomsR (Proxy @rs)
536
537 -- ** Class 'Gram_Term_AtomsT'
538 class Gram_Term_AtomsT meta ts t g where
539 term_atomsT :: Proxy t -> [CF g (ProTok meta ts)]
540 term_atomsT _t = []
541 instance Gram_Term_AtomsT meta ts t RuleDef
542
543 gram_term
544 :: forall g.
545 ( Gram_Term '[Proxy (->), Proxy Integer] () g
546 ) => [CF g ()]
547 gram_term =
548 [ ue termG
549 , ue term_operators
550 , ue term_app
551 , ug term_atom
552 , ue term_group
553 , ue term_abst
554 , void (term_abst_decl::CF g (Term_Name, TokType ()))
555 , ue term_let
556 , void term_mod_name
557 , void term_name
558 , void term_idname
559 , void $ cf_of_Terminal term_idname_tail
560 , void $ cf_of_Reg term_keywords
561 , void term_mod_opname
562 , void term_opname
563 , void $ cf_of_Terminal term_opname_ok
564 , void $ cf_of_Reg term_keysyms
565 ] where
566 ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g ()
567 ue = (() <$)
568 -- uf :: CF g (ProTok () '[Proxy (->)]) -> CF g ()
569 -- uf = (() <$)
570 ug :: CF g (Either (EToken () '[Proxy Token_Type])
571 (EToken () '[Proxy (->), Proxy Integer])) -> CF g ()
572 ug = (() <$)