]> Git — Sourcephile - haskell/symantic.git/blob - Language/Symantic/Compiling/Term/Grammar.hs
Add Gram_Term.
[haskell/symantic.git] / Language / Symantic / Compiling / Term / Grammar.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
9
10 import Control.Applicative (Alternative(..))
11 import Control.Arrow (left)
12 import Control.Monad (foldM, void, (=<<))
13 import qualified Data.Char as Char
14 import qualified Data.Function as Fun
15 import qualified Data.List as List
16 import Data.Map.Strict (Map)
17 import qualified Data.Map.Strict as Map
18 import Data.Monoid ((<>))
19 import Data.Proxy (Proxy(..))
20 import Data.Text (Text)
21 import qualified Data.Text as Text
22 import Prelude hiding (mod, not, any)
23
24 import Language.Symantic.Parsing
25 import Language.Symantic.Parsing.Grammar
26 import Language.Symantic.Parsing.EBNF
27 import Language.Symantic.Typing
28
29 -- * Type 'Term_Name'
30 newtype Term_Name = Term_Name Text
31 deriving (Eq, Ord, Show)
32
33 -- * Type 'ProTok'
34 -- | Proto 'EToken'. It's almost like a free monad,
35 -- but has a third constructor ('ProTokPi')
36 -- to require a type argument.
37 --
38 -- NOTE: this type may one day be removed
39 -- if proper type inferencing is done.
40 -- In the meantime it is used to require
41 -- term or type arguments needed to build
42 -- the 'EToken's of polymorphic terms.
43 data ProTok meta ts
44 = ProTokLam (EToken meta ts -> ProTok meta ts)
45 -- ^ Require a term argument.
46 | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts)
47 -- ^ Require a type argument.
48 | ProTok (EToken meta ts)
49 -- ^ No need for any argument.
50
51 -- | Declared here and not in @Compiling.Lambda@
52 -- to be able to use 'Token_Term_Var' in 'protok'.
53 data instance TokenT meta (ts::[*]) (Proxy (->))
54 = Token_Term_Abst Term_Name (EToken meta '[Proxy Token_Type]) (EToken meta ts)
55 | Token_Term_App (EToken meta ts) (EToken meta ts)
56 | Token_Term_Let Term_Name (EToken meta ts) (EToken meta ts)
57 | Token_Term_Var Term_Name
58 | Token_Term_Compose (EToken meta ts) (EToken meta ts)
59
60 -- * Class 'Tokenize'
61 type Tokenize meta ts
62 = TokenizeR meta ts ts
63
64 -- ** Type 'Tokenizers'
65 data Tokenizers meta ts
66 = Tokenizers
67 { tokenizers_prefix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
68 , tokenizers_infix :: Map Mod_Path (Map Term_Name (Term_ProTok Infix meta ts))
69 , tokenizers_postfix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
70 }
71 instance Monoid (Tokenizers meta ts) where
72 mempty = Tokenizers Map.empty Map.empty Map.empty
73 mappend x y =
74 Tokenizers
75 (Map.unionWith Map.union
76 (tokenizers_prefix x)
77 (tokenizers_prefix y))
78 (Map.unionWith Map.union
79 (tokenizers_infix x)
80 (tokenizers_infix y))
81 (Map.unionWith Map.union
82 (tokenizers_postfix x)
83 (tokenizers_postfix y))
84
85 data Term_ProTok fixy meta ts
86 = Term_ProTok
87 { term_protok :: meta -> ProTok meta ts
88 , term_fixity :: fixy
89 }
90
91 tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts
92 tokenizers = tokenizeR (Proxy @ts)
93
94 unProTok
95 :: ProTok meta ts
96 -> Either Error_Term_Gram (EToken meta ts)
97 unProTok (ProTok t) = Right t
98 unProTok _ = Left Error_Term_Gram_Term_incomplete
99
100 protok
101 :: Inj_Token meta ts (->)
102 => Mod Term_Name
103 -> Tokenizers meta ts
104 -> Either Error_Term_Gram
105 ( Maybe (Term_ProTok Unifix meta ts)
106 , Term_ProTok Infix meta ts
107 , Maybe (Term_ProTok Unifix meta ts)
108 )
109 protok (mod `Mod` tn) (Tokenizers pres ins posts) = do
110 let pre = Map.lookup mod pres >>= Map.lookup tn
111 let post = Map.lookup mod posts >>= Map.lookup tn
112 in_ <- var_or_err $ Map.lookup mod ins >>= Map.lookup tn
113 return (pre, in_, post)
114 where
115 var_or_err (Just x) = Right x
116 var_or_err Nothing =
117 case mod of
118 [] -> Right (var infixN5)
119 _ -> Left $ Error_Term_Gram_Undefined_term
120 var term_fixity =
121 Term_ProTok
122 { term_protok = \meta -> ProTok $ inj_etoken meta $ Token_Term_Var tn
123 , term_fixity
124 }
125
126 protok_app
127 :: Inj_Token meta ts (->)
128 => ProTok meta ts
129 -> [Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)]
130 -> Either Error_Term_Gram (ProTok meta ts)
131 protok_app =
132 foldM app
133 where
134 app acc (Left typ) =
135 case acc of
136 ProTokPi g -> Right $ g typ
137 _ -> Left Error_Term_Gram_Cannot_apply_type
138 app acc (Right te) =
139 case acc of
140 ProTokLam f -> Right $ f te
141 ProTok tok@(EToken e) -> Right $
142 ProTok $ inj_etoken (meta_of e) $
143 Token_Term_App tok te
144 _ -> Left Error_Term_Gram_Cannot_apply_term
145
146 -- ** Class 'TokenizeR'
147 class TokenizeR meta (ts::[*]) (rs::[*]) where
148 tokenizeR :: Proxy rs -> Tokenizers meta ts
149 instance TokenizeR meta ts '[] where
150 tokenizeR _rs = mempty
151 instance
152 ( TokenizeT meta ts t
153 , TokenizeR meta ts rs
154 ) => TokenizeR meta ts (t ': rs) where
155 tokenizeR _ =
156 tokenizeR (Proxy @rs) `mappend`
157 tokenizeT (Proxy @t)
158
159 -- ** Class 'TokenizeT'
160 class TokenizeT meta ts t where
161 tokenizeT :: Proxy t -> Tokenizers meta ts
162 -- tokenizeT _t = [] `Mod` []
163 tokenizeT _t = mempty
164
165 tokenizeTMod
166 :: Mod_Path
167 -> [(Term_Name, Term_ProTok fix meta ts)]
168 -> Map Mod_Path (Map Term_Name (Term_ProTok fix meta ts))
169 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
170
171 tokenize0
172 :: Inj_Token meta ts t
173 => Text -> fixity -> TokenT meta ts (Proxy t)
174 -> (Term_Name, Term_ProTok fixity meta ts)
175 tokenize0 n term_fixity tok =
176 (Term_Name n,) Term_ProTok
177 { term_protok = \meta -> ProTok $ inj_etoken meta $ tok
178 , term_fixity }
179
180 tokenize1
181 :: Inj_Token meta ts t
182 => Text -> fixity
183 -> (EToken meta ts -> TokenT meta ts (Proxy t))
184 -> (Term_Name, Term_ProTok fixity meta ts)
185 tokenize1 n term_fixity tok =
186 (Term_Name n,) Term_ProTok
187 { term_protok = \meta ->
188 ProTokLam $ \a ->
189 ProTok $ inj_etoken meta $ tok a
190 , term_fixity }
191
192 tokenize2
193 :: Inj_Token meta ts t
194 => Text -> fixity
195 -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
196 -> (Term_Name, Term_ProTok fixity meta ts)
197 tokenize2 n term_fixity tok =
198 (Term_Name n,) Term_ProTok
199 { term_protok = \meta ->
200 ProTokLam $ \a -> ProTokLam $ \b ->
201 ProTok $ inj_etoken meta $ tok a b
202 , term_fixity
203 }
204
205 tokenize3
206 :: Inj_Token meta ts t
207 => Text -> fixity
208 -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
209 -> (Term_Name, Term_ProTok fixity meta ts)
210 tokenize3 n term_fixity tok =
211 (Term_Name n,) Term_ProTok
212 { term_protok = \meta ->
213 ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c ->
214 ProTok $ inj_etoken meta $ tok a b c
215 , term_fixity
216 }
217
218 -- * Type 'Mod'
219 type Mod_Path = [Mod_Name]
220 newtype Mod_Name = Mod_Name Text
221 deriving (Eq, Ord, Show)
222 data Mod a = Mod Mod_Path a
223 deriving (Eq, Functor, Ord, Show)
224
225 -- * Class 'Gram_Term_Name'
226 class
227 ( Alternative g
228 , Alt g
229 , Alter g
230 , Alter g
231 , App g
232 , Gram_CF g
233 , Gram_Lexer g
234 , Gram_RegL g
235 , Gram_Rule g
236 , Gram_Terminal g
237 ) => Gram_Term_Name g where
238 mod_path :: CF g Mod_Path
239 mod_path = rule "mod_path" $
240 infixrG
241 (pure <$> mod_name)
242 (op <$ char '.')
243 where op = mappend
244 mod_name :: CF g Mod_Name
245 mod_name = rule "mod_name" $
246 (Mod_Name . Text.pack <$>) $
247 identG `minus`
248 (Fun.const
249 <$> term_keywords
250 <*. (any `but` term_idname_tail))
251 where
252 identG = (:) <$> headG <*> many (cf_of_term term_idname_tail)
253 headG = unicat $ Unicat Char.UppercaseLetter
254
255 term_mod_name :: CF g (Mod Term_Name)
256 term_mod_name = rule "term_mod_name" $
257 lexeme $
258 term_mod_idname <+>
259 parens term_mod_opname
260 term_name :: CF g Term_Name
261 term_name = rule "term_name" $
262 lexeme $
263 term_idname <+>
264 parens term_opname
265
266 term_mod_idname :: CF g (Mod Term_Name)
267 term_mod_idname = rule "term_mod_idname" $
268 Mod
269 <$> option [] (mod_path <* char '.')
270 <*> term_idname
271 term_idname :: CF g Term_Name
272 term_idname = rule "term_idname" $
273 (Term_Name . Text.pack <$>) $
274 (identG `minus`) $
275 Fun.const
276 <$> term_keywords
277 <*. (any `but` term_idname_tail)
278 where
279 identG = (:) <$> headG <*> many (cf_of_term term_idname_tail)
280 headG = unicat $ Unicat_Letter
281 term_idname_tail :: Terminal g Char
282 term_idname_tail = rule "term_idname_tail" $
283 unicat Unicat_Letter <+>
284 unicat Unicat_Number
285 term_keywords :: Reg rl g String
286 term_keywords = rule "term_keywords" $
287 choice $ string <$> ["in", "let"]
288
289 term_mod_opname :: CF g (Mod Term_Name)
290 term_mod_opname = rule "term_mod_opname" $
291 Mod
292 <$> option [] (mod_path <* char '.')
293 <*> term_opname
294 term_opname :: CF g Term_Name
295 term_opname = rule "term_opname" $
296 (Term_Name . Text.pack <$>) $
297 (symG `minus`) $
298 Fun.const
299 <$> term_keysyms
300 <*. (any `but` term_opname_ok)
301 where
302 symG = some $ cf_of_term $ term_opname_ok
303 term_opname_ok :: Terminal g Char
304 term_opname_ok = rule "term_opname_ok" $
305 choice (unicat <$>
306 [ Unicat_Symbol
307 , Unicat_Punctuation
308 , Unicat_Mark
309 ]) `but` koG
310 where
311 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
312 term_keysyms :: Reg rl g String
313 term_keysyms = rule "term_keysyms" $
314 choice $ string <$> ["\\", "->", "="]
315
316 deriving instance Gram_Term_Name g => Gram_Term_Name (CF g)
317 instance Gram_Term_Name EBNF
318 instance Gram_Term_Name RuleDef
319
320 -- * Class 'Gram_Term_Type'
321 class
322 ( Alternative g
323 , Alt g
324 , Alter g
325 , App g
326 , Gram_CF g
327 , Gram_Lexer g
328 , Gram_Meta meta g
329 , Gram_Rule g
330 , Gram_Terminal g
331 , Gram_Term_Name g
332 , Gram_Type meta g
333 ) => Gram_Term_Type meta g where
334 term_abst_decl
335 :: CF g (Term_Name, TokType meta)
336 term_abst_decl = rule "term_abst_decl" $
337 parens $ (,)
338 <$> term_name
339 <* symbol ":"
340 <*> typeG
341
342 deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g)
343 instance Gram_Term_Type meta EBNF
344 instance Gram_Term_Type meta RuleDef
345
346 -- * Class 'Gram_Error'
347 class Gram_Error g where
348 term_unError :: CF g (Either Error_Term_Gram a) -> CF g a
349 deriving instance Gram_Error g => Gram_Error (CF g)
350 instance Gram_Error EBNF where
351 term_unError (CF (EBNF g)) = CF $ EBNF g
352 instance Gram_Error RuleDef where
353 term_unError (CF (RuleDef (EBNF g))) =
354 CF $ RuleDef $ EBNF $ g
355
356 -- ** Type 'Error_Term_Gram'
357 data Error_Term_Gram
358 = Error_Term_Gram_Fixity Error_Fixity
359 | Error_Term_Gram_Cannot_apply_term
360 | Error_Term_Gram_Cannot_apply_type
361 | Error_Term_Gram_Undefined_term
362 | Error_Term_Gram_Term_incomplete
363 deriving (Eq, Show)
364
365 -- * Class 'Gram_Term'
366 class
367 ( Alternative g
368 , Alt g
369 , Alter g
370 , App g
371 , Gram_CF g
372 , Gram_Lexer g
373 , Gram_Meta meta g
374 , Gram_Rule g
375 , Gram_Terminal g
376 , Gram_Error g
377 , Gram_Term_AtomsR meta ts ts g
378 , Gram_Term_Name g
379 , Gram_Term_Type meta g
380 , Gram_Type meta g
381 ) => Gram_Term ts meta g where
382 -- | Wrap 'term_abst'. Useful to modify body's scope.
383 term_abst_args_body
384 :: CF g [(Term_Name, TokType meta)]
385 -> CF g (EToken meta ts)
386 -> CF g ([(Term_Name, TokType meta)], EToken meta ts)
387 term_abst_args_body args body = (,) <$> args <*> body
388 term_tokenizers :: CF g (Tokenizers meta ts -> a) -> CF g a
389
390 termG
391 :: Inj_Tokens meta ts '[Proxy (->)]
392 => CF g (EToken meta ts)
393 termG = rule "term" $
394 choice $
395 [ term_abst
396 , term_let
397 , term_operators
398 ]
399 term_operators
400 :: Inj_Tokens meta ts '[Proxy (->)]
401 => CF g (EToken meta ts)
402 term_operators = rule "term_operators" $
403 term_unError $
404 term_unError $
405 left Error_Term_Gram_Fixity <$>
406 operators
407 (Right <$> term_app)
408 (term_unError $ metaG $ term_tokenizers $ op_prefix <$> term_op_prefix)
409 (term_unError $ metaG $ term_tokenizers $ op_infix <$> term_op_infix)
410 (term_unError $ metaG $ term_tokenizers $ op_postfix <$> term_op_postfix)
411 where
412 bqG :: Gram_Terminal g' => g' Char
413 bqG = char '`'
414 op_infix name toks meta = do
415 (_pre, in_, _post) <- protok name toks
416 return $
417 (term_fixity in_,) $ \ma mb -> do
418 a <- ma
419 b <- mb
420 unProTok =<< term_protok in_ meta `protok_app` [Right a, Right b]
421 op_prefix name toks meta = do
422 (pre, _in_, _post) <- protok name toks
423 case pre of
424 Just p ->
425 Right $ (term_fixity p,) $ (=<<) $ \a ->
426 unProTok =<< term_protok p meta `protok_app` [Right a]
427 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
428 op_postfix name toks meta = do
429 (_pre, _in_, post) <- protok name toks
430 case post of
431 Just p ->
432 Right $ (term_fixity p,) $ (=<<) $ \a ->
433 unProTok =<< term_protok p meta `protok_app` [Right a]
434 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
435 term_op_postfix :: CF g (Mod Term_Name)
436 term_op_postfix = rule "term_op_postfix" $
437 lexeme $
438 bqG *> term_mod_idname <+> -- <* (cf_of_term $ Gram.Term (pure ' ') `but` bqG)
439 term_mod_opname
440 term_op_infix :: CF g (Mod Term_Name)
441 term_op_infix = rule "term_op_infix" $
442 lexeme $
443 between bqG bqG term_mod_idname <+>
444 term_mod_opname
445 term_op_prefix :: CF g (Mod Term_Name)
446 term_op_prefix = rule "term_op_prefix" $
447 lexeme $
448 term_mod_idname <* bqG <+>
449 term_mod_opname
450 term_app
451 :: Inj_Tokens meta ts '[Proxy (->)]
452 => CF g (EToken meta ts)
453 term_app = rule "term_app" $
454 term_unError $
455 (\a as -> unProTok =<< protok_app a as)
456 <$> term_atom_proto
457 <*> many term_atom
458 term_atom
459 :: Inj_Tokens meta ts '[Proxy (->)]
460 => CF g (Either (EToken meta '[Proxy Token_Type])
461 (EToken meta ts))
462 term_atom = rule "term_atom" $
463 (Left <$ char '@' <*> typeG) <+>
464 (Right <$> term_unError (unProTok <$> term_atom_proto))
465 term_atom_proto
466 :: Inj_Tokens meta ts '[Proxy (->)]
467 => CF g (ProTok meta ts)
468 term_atom_proto =
469 choice $
470 term_atomsR (Proxy @ts) <>
471 [ metaG $ ((\(_, in_, _) -> term_protok in_) <$>) $ term_unError $ term_tokenizers $
472 protok <$> term_mod_name
473 , ProTok <$> term_group
474 ]
475 term_group
476 :: Inj_Tokens meta ts '[Proxy (->)]
477 => CF g (EToken meta ts)
478 term_group = rule "term_group" $ parens termG
479 term_abst
480 :: Inj_Tokens meta ts '[Proxy (->)]
481 => CF g (EToken meta ts)
482 term_abst = rule "term_abst" $
483 metaG $
484 ((\(xs, te) meta ->
485 List.foldr (\(x, ty_x) ->
486 inj_etoken meta .
487 Token_Term_Abst x ty_x) te xs) <$>) $
488 term_abst_args_body
489 (symbol "\\" *> some term_abst_decl <* symbol "->")
490 termG
491 term_let
492 :: Inj_Tokens meta ts '[Proxy (->)]
493 => CF g (EToken meta ts)
494 term_let = rule "term_let" $
495 metaG $
496 (\name args bound body meta ->
497 inj_etoken meta $
498 Token_Term_Let name
499 (List.foldr
500 (\(x, ty_x) -> inj_etoken meta . Token_Term_Abst x ty_x) bound args
501 ) body)
502 <$ symbol "let"
503 <*> term_name
504 <*> many term_abst_decl
505 <* symbol "="
506 <*> termG
507 <* symbol "in"
508 <*> termG
509
510 deriving instance
511 ( Gram_Term ts meta g
512 , Gram_Term_AtomsR meta ts ts (CF g)
513 ) => Gram_Term ts meta (CF g)
514 instance
515 Gram_Term_AtomsR meta ts ts EBNF =>
516 Gram_Term ts meta EBNF where
517 term_tokenizers (CF (EBNF g)) = CF $ EBNF g
518 instance
519 Gram_Term_AtomsR meta ts ts RuleDef =>
520 Gram_Term ts meta RuleDef where
521 term_tokenizers (CF (RuleDef (EBNF g))) =
522 CF $ RuleDef $ EBNF $ g
523
524 -- ** Class 'Gram_Term_AtomsR'
525 class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where
526 term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)]
527 instance Gram_Term_AtomsR meta ts '[] g where
528 term_atomsR _rs = []
529 instance
530 ( Gram_Term_AtomsT meta ts t g
531 , Gram_Term_AtomsR meta ts rs g
532 ) => Gram_Term_AtomsR meta ts (t ': rs) g where
533 term_atomsR _ =
534 term_atomsT (Proxy @t) <>
535 term_atomsR (Proxy @rs)
536
537 -- ** Class 'Gram_Term_AtomsT'
538 class Gram_Term_AtomsT meta ts t g where
539 term_atomsT :: Proxy t -> [CF g (ProTok meta ts)]
540 term_atomsT _t = []
541
542 gram_term
543 :: forall g.
544 ( Gram_Term '[Proxy (->), Proxy Integer] () g
545 ) => [CF g ()]
546 gram_term =
547 [ ue termG
548 , ue term_operators
549 , ue term_app
550 , ug term_atom
551 , ue term_group
552 , ue term_abst
553 , void (term_abst_decl::CF g (Term_Name, TokType ()))
554 , ue term_let
555 , void term_mod_name
556 , void term_name
557 , void term_idname
558 , void $ cf_of_term term_idname_tail
559 , void $ cf_of_reg term_keywords
560 , void term_mod_opname
561 , void term_opname
562 , void $ cf_of_term term_opname_ok
563 , void $ cf_of_reg term_keysyms
564 ] where
565 ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g ()
566 ue = (() <$)
567 -- uf :: CF g (ProTok () '[Proxy (->)]) -> CF g ()
568 -- uf = (() <$)
569 ug :: CF g (Either (EToken () '[Proxy Token_Type])
570 (EToken () '[Proxy (->), Proxy Integer])) -> CF g ()
571 ug = (() <$)