]> Git — Sourcephile - haskell/symantic.git/blob - Language/Symantic/Compiling/Term/Grammar.hs
Remove Alternative uses in grammars.
[haskell/symantic.git] / Language / Symantic / Compiling / Term / Grammar.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
9
10 import Control.Arrow (left)
11 import Control.Monad (foldM, void, (=<<))
12 import qualified Data.Char as Char
13 import qualified Data.Function as Fun
14 import qualified Data.List as List
15 import Data.Map.Strict (Map)
16 import qualified Data.Map.Strict as Map
17 import Data.Monoid ((<>))
18 import Data.Proxy (Proxy(..))
19 import Data.Text (Text)
20 import qualified Data.Text as Text
21 import Prelude hiding (mod, not, any)
22
23 import Language.Symantic.Parsing
24 import Language.Symantic.Parsing.Grammar
25 import Language.Symantic.Parsing.EBNF
26 import Language.Symantic.Typing
27
28 -- * Type 'Term_Name'
29 newtype Term_Name = Term_Name Text
30 deriving (Eq, Ord, Show)
31
32 -- * Type 'ProTok'
33 -- | Proto 'EToken'. It's almost like a free monad,
34 -- but has a third constructor ('ProTokPi')
35 -- to require a type argument.
36 --
37 -- NOTE: this type may one day be removed
38 -- if proper type inferencing is done.
39 -- In the meantime it is used to require
40 -- term or type arguments needed to build
41 -- the 'EToken's of polymorphic terms.
42 data ProTok meta ts
43 = ProTokLam (EToken meta ts -> ProTok meta ts)
44 -- ^ Require a term argument.
45 | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts)
46 -- ^ Require a type argument.
47 | ProTok (EToken meta ts)
48 -- ^ No need for any argument.
49
50 -- | Declared here and not in @Compiling.Lambda@
51 -- to be able to use 'Token_Term_Var' in 'protok'.
52 data instance TokenT meta (ts::[*]) (Proxy (->))
53 = Token_Term_Abst Term_Name (EToken meta '[Proxy Token_Type]) (EToken meta ts)
54 | Token_Term_App (EToken meta ts) (EToken meta ts)
55 | Token_Term_Let Term_Name (EToken meta ts) (EToken meta ts)
56 | Token_Term_Var Term_Name
57 | Token_Term_Compose (EToken meta ts) (EToken meta ts)
58
59 -- * Class 'Tokenize'
60 type Tokenize meta ts
61 = TokenizeR meta ts ts
62
63 -- ** Type 'Tokenizers'
64 data Tokenizers meta ts
65 = Tokenizers
66 { tokenizers_prefix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
67 , tokenizers_infix :: Map Mod_Path (Map Term_Name (Term_ProTok Infix meta ts))
68 , tokenizers_postfix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
69 }
70 instance Monoid (Tokenizers meta ts) where
71 mempty = Tokenizers Map.empty Map.empty Map.empty
72 mappend x y =
73 Tokenizers
74 (Map.unionWith Map.union
75 (tokenizers_prefix x)
76 (tokenizers_prefix y))
77 (Map.unionWith Map.union
78 (tokenizers_infix x)
79 (tokenizers_infix y))
80 (Map.unionWith Map.union
81 (tokenizers_postfix x)
82 (tokenizers_postfix y))
83
84 data Term_ProTok fixy meta ts
85 = Term_ProTok
86 { term_protok :: meta -> ProTok meta ts
87 , term_fixity :: fixy
88 }
89
90 tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts
91 tokenizers = tokenizeR (Proxy @ts)
92
93 unProTok
94 :: ProTok meta ts
95 -> Either Error_Term_Gram (EToken meta ts)
96 unProTok (ProTok t) = Right t
97 unProTok _ = Left Error_Term_Gram_Term_incomplete
98
99 protok
100 :: Inj_Token meta ts (->)
101 => Mod Term_Name
102 -> Tokenizers meta ts
103 -> Either Error_Term_Gram
104 ( Maybe (Term_ProTok Unifix meta ts)
105 , Term_ProTok Infix meta ts
106 , Maybe (Term_ProTok Unifix meta ts)
107 )
108 protok (mod `Mod` tn) (Tokenizers pres ins posts) = do
109 let pre = Map.lookup mod pres >>= Map.lookup tn
110 let post = Map.lookup mod posts >>= Map.lookup tn
111 in_ <- var_or_err $ Map.lookup mod ins >>= Map.lookup tn
112 return (pre, in_, post)
113 where
114 var_or_err (Just x) = Right x
115 var_or_err Nothing =
116 case mod of
117 [] -> Right (var infixN5)
118 _ -> Left $ Error_Term_Gram_Undefined_term
119 var term_fixity =
120 Term_ProTok
121 { term_protok = \meta -> ProTok $ inj_etoken meta $ Token_Term_Var tn
122 , term_fixity
123 }
124
125 protok_app
126 :: Inj_Token meta ts (->)
127 => ProTok meta ts
128 -> [Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)]
129 -> Either Error_Term_Gram (ProTok meta ts)
130 protok_app =
131 foldM app
132 where
133 app acc (Left typ) =
134 case acc of
135 ProTokPi g -> Right $ g typ
136 _ -> Left Error_Term_Gram_Cannot_apply_type
137 app acc (Right te) =
138 case acc of
139 ProTokLam f -> Right $ f te
140 ProTok tok@(EToken e) -> Right $
141 ProTok $ inj_etoken (meta_of e) $
142 Token_Term_App tok te
143 _ -> Left Error_Term_Gram_Cannot_apply_term
144
145 -- ** Class 'TokenizeR'
146 class TokenizeR meta (ts::[*]) (rs::[*]) where
147 tokenizeR :: Proxy rs -> Tokenizers meta ts
148 instance TokenizeR meta ts '[] where
149 tokenizeR _rs = mempty
150 instance
151 ( TokenizeT meta ts t
152 , TokenizeR meta ts rs
153 ) => TokenizeR meta ts (t ': rs) where
154 tokenizeR _ =
155 tokenizeR (Proxy @rs) `mappend`
156 tokenizeT (Proxy @t)
157
158 -- ** Class 'TokenizeT'
159 class TokenizeT meta ts t where
160 tokenizeT :: Proxy t -> Tokenizers meta ts
161 -- tokenizeT _t = [] `Mod` []
162 tokenizeT _t = mempty
163
164 tokenizeTMod
165 :: Mod_Path
166 -> [(Term_Name, Term_ProTok fix meta ts)]
167 -> Map Mod_Path (Map Term_Name (Term_ProTok fix meta ts))
168 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
169
170 tokenize0
171 :: Inj_Token meta ts t
172 => Text -> fixity -> TokenT meta ts (Proxy t)
173 -> (Term_Name, Term_ProTok fixity meta ts)
174 tokenize0 n term_fixity tok =
175 (Term_Name n,) Term_ProTok
176 { term_protok = \meta -> ProTok $ inj_etoken meta $ tok
177 , term_fixity }
178
179 tokenize1
180 :: Inj_Token meta ts t
181 => Text -> fixity
182 -> (EToken meta ts -> TokenT meta ts (Proxy t))
183 -> (Term_Name, Term_ProTok fixity meta ts)
184 tokenize1 n term_fixity tok =
185 (Term_Name n,) Term_ProTok
186 { term_protok = \meta ->
187 ProTokLam $ \a ->
188 ProTok $ inj_etoken meta $ tok a
189 , term_fixity }
190
191 tokenize2
192 :: Inj_Token meta ts t
193 => Text -> fixity
194 -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
195 -> (Term_Name, Term_ProTok fixity meta ts)
196 tokenize2 n term_fixity tok =
197 (Term_Name n,) Term_ProTok
198 { term_protok = \meta ->
199 ProTokLam $ \a -> ProTokLam $ \b ->
200 ProTok $ inj_etoken meta $ tok a b
201 , term_fixity
202 }
203
204 tokenize3
205 :: Inj_Token meta ts t
206 => Text -> fixity
207 -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
208 -> (Term_Name, Term_ProTok fixity meta ts)
209 tokenize3 n term_fixity tok =
210 (Term_Name n,) Term_ProTok
211 { term_protok = \meta ->
212 ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c ->
213 ProTok $ inj_etoken meta $ tok a b c
214 , term_fixity
215 }
216
217 -- * Type 'Mod'
218 type Mod_Path = [Mod_Name]
219 newtype Mod_Name = Mod_Name Text
220 deriving (Eq, Ord, Show)
221 data Mod a = Mod Mod_Path a
222 deriving (Eq, Functor, Ord, Show)
223
224 -- * Class 'Gram_Term_Name'
225 class
226 ( Alt g
227 , Alter g
228 , Alter g
229 , App g
230 , Gram_CF g
231 , Gram_Lexer g
232 , Gram_RegL g
233 , Gram_Rule g
234 , Gram_Terminal g
235 ) => Gram_Term_Name g where
236 mod_path :: CF g Mod_Path
237 mod_path = rule "mod_path" $
238 infixrG
239 (pure <$> mod_name)
240 (op <$ char '.')
241 where op = mappend
242 mod_name :: CF g Mod_Name
243 mod_name = rule "mod_name" $
244 (Mod_Name . Text.pack <$>) $
245 identG `minus`
246 (Fun.const
247 <$> term_keywords
248 <*. (any `but` term_idname_tail))
249 where
250 identG = (:) <$> headG <*> many (cf_of_term term_idname_tail)
251 headG = unicat $ Unicat Char.UppercaseLetter
252
253 term_mod_name :: CF g (Mod Term_Name)
254 term_mod_name = rule "term_mod_name" $
255 lexeme $
256 term_mod_idname <+>
257 parens term_mod_opname
258 term_name :: CF g Term_Name
259 term_name = rule "term_name" $
260 lexeme $
261 term_idname <+>
262 parens term_opname
263
264 term_mod_idname :: CF g (Mod Term_Name)
265 term_mod_idname = rule "term_mod_idname" $
266 Mod
267 <$> option [] (mod_path <* char '.')
268 <*> term_idname
269 term_idname :: CF g Term_Name
270 term_idname = rule "term_idname" $
271 (Term_Name . Text.pack <$>) $
272 (identG `minus`) $
273 Fun.const
274 <$> term_keywords
275 <*. (any `but` term_idname_tail)
276 where
277 identG = (:) <$> headG <*> many (cf_of_term term_idname_tail)
278 headG = unicat $ Unicat_Letter
279 term_idname_tail :: Terminal g Char
280 term_idname_tail = rule "term_idname_tail" $
281 unicat Unicat_Letter <+>
282 unicat Unicat_Number
283 term_keywords :: Reg rl g String
284 term_keywords = rule "term_keywords" $
285 choice $ string <$> ["in", "let"]
286
287 term_mod_opname :: CF g (Mod Term_Name)
288 term_mod_opname = rule "term_mod_opname" $
289 Mod
290 <$> option [] (mod_path <* char '.')
291 <*> term_opname
292 term_opname :: CF g Term_Name
293 term_opname = rule "term_opname" $
294 (Term_Name . Text.pack <$>) $
295 (symG `minus`) $
296 Fun.const
297 <$> term_keysyms
298 <*. (any `but` term_opname_ok)
299 where
300 symG = some $ cf_of_term $ term_opname_ok
301 term_opname_ok :: Terminal g Char
302 term_opname_ok = rule "term_opname_ok" $
303 choice (unicat <$>
304 [ Unicat_Symbol
305 , Unicat_Punctuation
306 , Unicat_Mark
307 ]) `but` koG
308 where
309 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
310 term_keysyms :: Reg rl g String
311 term_keysyms = rule "term_keysyms" $
312 choice $ string <$> ["\\", "->", "="]
313
314 deriving instance Gram_Term_Name g => Gram_Term_Name (CF g)
315 instance Gram_Term_Name EBNF
316 instance Gram_Term_Name RuleDef
317
318 -- * Class 'Gram_Term_Type'
319 class
320 ( Alt g
321 , Alter g
322 , App g
323 , Gram_CF g
324 , Gram_Lexer g
325 , Gram_Meta meta g
326 , Gram_Rule g
327 , Gram_Terminal g
328 , Gram_Term_Name g
329 , Gram_Type meta g
330 ) => Gram_Term_Type meta g where
331 term_abst_decl
332 :: CF g (Term_Name, TokType meta)
333 term_abst_decl = rule "term_abst_decl" $
334 parens $ (,)
335 <$> term_name
336 <* symbol ":"
337 <*> typeG
338
339 deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g)
340 instance Gram_Term_Type meta EBNF
341 instance Gram_Term_Type meta RuleDef
342
343 -- * Class 'Gram_Error'
344 class Gram_Error g where
345 term_unError :: CF g (Either Error_Term_Gram a) -> CF g a
346 deriving instance Gram_Error g => Gram_Error (CF g)
347 instance Gram_Error EBNF where
348 term_unError (CF (EBNF g)) = CF $ EBNF g
349 instance Gram_Error RuleDef where
350 term_unError (CF (RuleDef (EBNF g))) =
351 CF $ RuleDef $ EBNF $ g
352
353 -- ** Type 'Error_Term_Gram'
354 data Error_Term_Gram
355 = Error_Term_Gram_Fixity Error_Fixity
356 | Error_Term_Gram_Cannot_apply_term
357 | Error_Term_Gram_Cannot_apply_type
358 | Error_Term_Gram_Undefined_term
359 | Error_Term_Gram_Term_incomplete
360 deriving (Eq, Show)
361
362 -- * Class 'Gram_Term'
363 class
364 ( Alt g
365 , Alter g
366 , App g
367 , Gram_CF g
368 , Gram_Lexer g
369 , Gram_Meta meta g
370 , Gram_Rule g
371 , Gram_Terminal g
372 , Gram_Error g
373 , Gram_Term_AtomsR meta ts ts g
374 , Gram_Term_Name g
375 , Gram_Term_Type meta g
376 , Gram_Type meta g
377 ) => Gram_Term ts meta g where
378 -- | Wrap 'term_abst'. Useful to modify body's scope.
379 term_abst_args_body
380 :: CF g [(Term_Name, TokType meta)]
381 -> CF g (EToken meta ts)
382 -> CF g ([(Term_Name, TokType meta)], EToken meta ts)
383 term_abst_args_body args body = (,) <$> args <*> body
384 term_tokenizers :: CF g (Tokenizers meta ts -> a) -> CF g a
385
386 termG
387 :: Inj_Tokens meta ts '[Proxy (->)]
388 => CF g (EToken meta ts)
389 termG = rule "term" $
390 choice $
391 [ term_abst
392 , term_let
393 , term_operators
394 ]
395 term_operators
396 :: Inj_Tokens meta ts '[Proxy (->)]
397 => CF g (EToken meta ts)
398 term_operators = rule "term_operators" $
399 term_unError $
400 term_unError $
401 left Error_Term_Gram_Fixity <$>
402 operators
403 (Right <$> term_app)
404 (term_unError $ metaG $ term_tokenizers $ op_prefix <$> term_op_prefix)
405 (term_unError $ metaG $ term_tokenizers $ op_infix <$> term_op_infix)
406 (term_unError $ metaG $ term_tokenizers $ op_postfix <$> term_op_postfix)
407 where
408 bqG :: Gram_Terminal g' => g' Char
409 bqG = char '`'
410 op_infix name toks meta = do
411 (_pre, in_, _post) <- protok name toks
412 return $
413 (term_fixity in_,) $ \ma mb -> do
414 a <- ma
415 b <- mb
416 unProTok =<< term_protok in_ meta `protok_app` [Right a, Right b]
417 op_prefix name toks meta = do
418 (pre, _in_, _post) <- protok name toks
419 case pre of
420 Just p ->
421 Right $ (term_fixity p,) $ (=<<) $ \a ->
422 unProTok =<< term_protok p meta `protok_app` [Right a]
423 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
424 op_postfix name toks meta = do
425 (_pre, _in_, post) <- protok name toks
426 case post of
427 Just p ->
428 Right $ (term_fixity p,) $ (=<<) $ \a ->
429 unProTok =<< term_protok p meta `protok_app` [Right a]
430 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
431 term_op_postfix :: CF g (Mod Term_Name)
432 term_op_postfix = rule "term_op_postfix" $
433 lexeme $
434 bqG *> term_mod_idname <+> -- <* (cf_of_term $ Gram.Term (pure ' ') `but` bqG)
435 term_mod_opname
436 term_op_infix :: CF g (Mod Term_Name)
437 term_op_infix = rule "term_op_infix" $
438 lexeme $
439 between bqG bqG term_mod_idname <+>
440 term_mod_opname
441 term_op_prefix :: CF g (Mod Term_Name)
442 term_op_prefix = rule "term_op_prefix" $
443 lexeme $
444 term_mod_idname <* bqG <+>
445 term_mod_opname
446 term_app
447 :: Inj_Tokens meta ts '[Proxy (->)]
448 => CF g (EToken meta ts)
449 term_app = rule "term_app" $
450 term_unError $
451 (\a as -> unProTok =<< protok_app a as)
452 <$> term_atom_proto
453 <*> many term_atom
454 term_atom
455 :: Inj_Tokens meta ts '[Proxy (->)]
456 => CF g (Either (EToken meta '[Proxy Token_Type])
457 (EToken meta ts))
458 term_atom = rule "term_atom" $
459 (Left <$ char '@' <*> typeG) <+>
460 (Right <$> term_unError (unProTok <$> term_atom_proto))
461 term_atom_proto
462 :: Inj_Tokens meta ts '[Proxy (->)]
463 => CF g (ProTok meta ts)
464 term_atom_proto =
465 choice $
466 term_atomsR (Proxy @ts) <>
467 [ metaG $ ((\(_, in_, _) -> term_protok in_) <$>) $ term_unError $ term_tokenizers $
468 protok <$> term_mod_name
469 , ProTok <$> term_group
470 ]
471 term_group
472 :: Inj_Tokens meta ts '[Proxy (->)]
473 => CF g (EToken meta ts)
474 term_group = rule "term_group" $ parens termG
475 term_abst
476 :: Inj_Tokens meta ts '[Proxy (->)]
477 => CF g (EToken meta ts)
478 term_abst = rule "term_abst" $
479 metaG $
480 ((\(xs, te) meta ->
481 List.foldr (\(x, ty_x) ->
482 inj_etoken meta .
483 Token_Term_Abst x ty_x) te xs) <$>) $
484 term_abst_args_body
485 (symbol "\\" *> some term_abst_decl <* symbol "->")
486 termG
487 term_let
488 :: Inj_Tokens meta ts '[Proxy (->)]
489 => CF g (EToken meta ts)
490 term_let = rule "term_let" $
491 metaG $
492 (\name args bound body meta ->
493 inj_etoken meta $
494 Token_Term_Let name
495 (List.foldr
496 (\(x, ty_x) -> inj_etoken meta . Token_Term_Abst x ty_x) bound args
497 ) body)
498 <$ symbol "let"
499 <*> term_name
500 <*> many term_abst_decl
501 <* symbol "="
502 <*> termG
503 <* symbol "in"
504 <*> termG
505
506 deriving instance
507 ( Gram_Term ts meta g
508 , Gram_Term_AtomsR meta ts ts (CF g)
509 ) => Gram_Term ts meta (CF g)
510 instance
511 Gram_Term_AtomsR meta ts ts EBNF =>
512 Gram_Term ts meta EBNF where
513 term_tokenizers (CF (EBNF g)) = CF $ EBNF g
514 instance
515 Gram_Term_AtomsR meta ts ts RuleDef =>
516 Gram_Term ts meta RuleDef where
517 term_tokenizers (CF (RuleDef (EBNF g))) =
518 CF $ RuleDef $ EBNF $ g
519
520 -- ** Class 'Gram_Term_AtomsR'
521 class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where
522 term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)]
523 instance Gram_Term_AtomsR meta ts '[] g where
524 term_atomsR _rs = []
525 instance
526 ( Gram_Term_AtomsT meta ts t g
527 , Gram_Term_AtomsR meta ts rs g
528 ) => Gram_Term_AtomsR meta ts (t ': rs) g where
529 term_atomsR _ =
530 term_atomsT (Proxy @t) <>
531 term_atomsR (Proxy @rs)
532
533 -- ** Class 'Gram_Term_AtomsT'
534 class Gram_Term_AtomsT meta ts t g where
535 term_atomsT :: Proxy t -> [CF g (ProTok meta ts)]
536 term_atomsT _t = []
537 instance Gram_Term_AtomsT meta ts t RuleDef
538
539 gram_term
540 :: forall g.
541 ( Gram_Term '[Proxy (->), Proxy Integer] () g
542 ) => [CF g ()]
543 gram_term =
544 [ ue termG
545 , ue term_operators
546 , ue term_app
547 , ug term_atom
548 , ue term_group
549 , ue term_abst
550 , void (term_abst_decl::CF g (Term_Name, TokType ()))
551 , ue term_let
552 , void term_mod_name
553 , void term_name
554 , void term_idname
555 , void $ cf_of_term term_idname_tail
556 , void $ cf_of_reg term_keywords
557 , void term_mod_opname
558 , void term_opname
559 , void $ cf_of_term term_opname_ok
560 , void $ cf_of_reg term_keysyms
561 ] where
562 ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g ()
563 ue = (() <$)
564 -- uf :: CF g (ProTok () '[Proxy (->)]) -> CF g ()
565 -- uf = (() <$)
566 ug :: CF g (Either (EToken () '[Proxy Token_Type])
567 (EToken () '[Proxy (->), Proxy Integer])) -> CF g ()
568 ug = (() <$)