]> Git — Sourcephile - haskell/symantic.git/blob - symantic/Language/Symantic/Compiling/Term/Grammar.hs
Split into symantic{,-grammar,-lib}.
[haskell/symantic.git] / symantic / Language / Symantic / Compiling / Term / Grammar.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
9
10 import Control.Arrow (left)
11 import Control.Monad (foldM, void, (=<<))
12 import qualified Data.Char as Char
13 import qualified Data.Function as Fun
14 import qualified Data.List as List
15 import Data.Map.Strict (Map)
16 import qualified Data.Map.Strict as Map
17 import Data.Monoid ((<>))
18 import Data.Proxy (Proxy(..))
19 import Data.Text (Text)
20 import qualified Data.Text as Text
21 import Prelude hiding (mod, not, any)
22
23 import Language.Symantic.Parsing
24 import Language.Symantic.Typing
25
26 -- * Type 'Term_Name'
27 newtype Term_Name = Term_Name Text
28 deriving (Eq, Ord, Show)
29
30 -- * Type 'ProTok'
31 -- | Proto 'EToken'. It's almost like a free monad,
32 -- but has a third constructor ('ProTokPi')
33 -- to require a type argument.
34 --
35 -- NOTE: this type may one day be removed
36 -- if proper type inferencing is done.
37 -- In the meantime it is used to require
38 -- term or type arguments needed to build
39 -- the 'EToken's of polymorphic terms.
40 data ProTok meta ts
41 = ProTokLam (EToken meta ts -> ProTok meta ts)
42 -- ^ Require a term argument.
43 | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts)
44 -- ^ Require a type argument.
45 | ProTok (EToken meta ts)
46 -- ^ No need for any argument.
47
48 -- | Declared here and not in @Compiling.Lambda@
49 -- to be able to use 'Token_Term_Var' in 'protok'.
50 data instance TokenT meta (ts::[*]) (Proxy (->))
51 = Token_Term_Abst Term_Name (EToken meta '[Proxy Token_Type]) (EToken meta ts)
52 | Token_Term_App (EToken meta ts) (EToken meta ts)
53 | Token_Term_Let Term_Name (EToken meta ts) (EToken meta ts)
54 | Token_Term_Var Term_Name
55 | Token_Term_Compose (EToken meta ts) (EToken meta ts)
56
57 -- * Class 'Tokenize'
58 type Tokenize meta ts
59 = TokenizeR meta ts ts
60
61 -- ** Type 'Tokenizers'
62 data Tokenizers meta ts
63 = Tokenizers
64 { tokenizers_prefix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
65 , tokenizers_infix :: Map Mod_Path (Map Term_Name (Term_ProTok Infix meta ts))
66 , tokenizers_postfix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
67 }
68 instance Monoid (Tokenizers meta ts) where
69 mempty = Tokenizers Map.empty Map.empty Map.empty
70 mappend x y =
71 Tokenizers
72 (Map.unionWith Map.union
73 (tokenizers_prefix x)
74 (tokenizers_prefix y))
75 (Map.unionWith Map.union
76 (tokenizers_infix x)
77 (tokenizers_infix y))
78 (Map.unionWith Map.union
79 (tokenizers_postfix x)
80 (tokenizers_postfix y))
81
82 data Term_ProTok fixy meta ts
83 = Term_ProTok
84 { term_protok :: meta -> ProTok meta ts
85 , term_fixity :: fixy
86 }
87
88 tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts
89 tokenizers = tokenizeR (Proxy @ts)
90
91 unProTok
92 :: ProTok meta ts
93 -> Either Error_Term_Gram (EToken meta ts)
94 unProTok (ProTok t) = Right t
95 unProTok _ = Left Error_Term_Gram_Term_incomplete
96
97 protok
98 :: Inj_Token meta ts (->)
99 => Mod Term_Name
100 -> Tokenizers meta ts
101 -> Either Error_Term_Gram
102 ( Maybe (Term_ProTok Unifix meta ts)
103 , Term_ProTok Infix meta ts
104 , Maybe (Term_ProTok Unifix meta ts)
105 )
106 protok (mod `Mod` tn) (Tokenizers pres ins posts) = do
107 let pre = Map.lookup mod pres >>= Map.lookup tn
108 let post = Map.lookup mod posts >>= Map.lookup tn
109 in_ <- var_or_err $ Map.lookup mod ins >>= Map.lookup tn
110 return (pre, in_, post)
111 where
112 var_or_err (Just x) = Right x
113 var_or_err Nothing =
114 case mod of
115 [] -> Right (var infixN5)
116 _ -> Left $ Error_Term_Gram_Undefined_term
117 var term_fixity =
118 Term_ProTok
119 { term_protok = \meta -> ProTok $ inj_EToken meta $ Token_Term_Var tn
120 , term_fixity
121 }
122
123 protok_app
124 :: Inj_Token meta ts (->)
125 => ProTok meta ts
126 -> [Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)]
127 -> Either Error_Term_Gram (ProTok meta ts)
128 protok_app =
129 foldM app
130 where
131 app acc (Left typ) =
132 case acc of
133 ProTokPi g -> Right $ g typ
134 _ -> Left Error_Term_Gram_Cannot_apply_type
135 app acc (Right te) =
136 case acc of
137 ProTokLam f -> Right $ f te
138 ProTok tok@(EToken e) -> Right $
139 ProTok $ inj_EToken (meta_of e) $
140 Token_Term_App tok te
141 _ -> Left Error_Term_Gram_Cannot_apply_term
142
143 -- ** Class 'TokenizeR'
144 class TokenizeR meta (ts::[*]) (rs::[*]) where
145 tokenizeR :: Proxy rs -> Tokenizers meta ts
146 instance TokenizeR meta ts '[] where
147 tokenizeR _rs = mempty
148 instance
149 ( TokenizeT meta ts t
150 , TokenizeR meta ts rs
151 ) => TokenizeR meta ts (t ': rs) where
152 tokenizeR _ =
153 tokenizeR (Proxy @rs) `mappend`
154 tokenizeT (Proxy @t)
155
156 -- ** Class 'TokenizeT'
157 class TokenizeT meta ts t where
158 tokenizeT :: Proxy t -> Tokenizers meta ts
159 -- tokenizeT _t = [] `Mod` []
160 tokenizeT _t = mempty
161
162 tokenizeTMod
163 :: Mod_Path
164 -> [(Term_Name, Term_ProTok fix meta ts)]
165 -> Map Mod_Path (Map Term_Name (Term_ProTok fix meta ts))
166 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
167
168 tokenize0
169 :: Inj_Token meta ts t
170 => Text -> fixity -> TokenT meta ts (Proxy t)
171 -> (Term_Name, Term_ProTok fixity meta ts)
172 tokenize0 n term_fixity tok =
173 (Term_Name n,) Term_ProTok
174 { term_protok = \meta -> ProTok $ inj_EToken meta $ tok
175 , term_fixity }
176
177 tokenize1
178 :: Inj_Token meta ts t
179 => Text -> fixity
180 -> (EToken meta ts -> TokenT meta ts (Proxy t))
181 -> (Term_Name, Term_ProTok fixity meta ts)
182 tokenize1 n term_fixity tok =
183 (Term_Name n,) Term_ProTok
184 { term_protok = \meta ->
185 ProTokLam $ \a ->
186 ProTok $ inj_EToken meta $ tok a
187 , term_fixity }
188
189 tokenize2
190 :: Inj_Token meta ts t
191 => Text -> fixity
192 -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
193 -> (Term_Name, Term_ProTok fixity meta ts)
194 tokenize2 n term_fixity tok =
195 (Term_Name n,) Term_ProTok
196 { term_protok = \meta ->
197 ProTokLam $ \a -> ProTokLam $ \b ->
198 ProTok $ inj_EToken meta $ tok a b
199 , term_fixity
200 }
201
202 tokenize3
203 :: Inj_Token meta ts t
204 => Text -> fixity
205 -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
206 -> (Term_Name, Term_ProTok fixity meta ts)
207 tokenize3 n term_fixity tok =
208 (Term_Name n,) Term_ProTok
209 { term_protok = \meta ->
210 ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c ->
211 ProTok $ inj_EToken meta $ tok a b c
212 , term_fixity
213 }
214
215 -- * Type 'Mod'
216 type Mod_Path = [Mod_Name]
217 newtype Mod_Name = Mod_Name Text
218 deriving (Eq, Ord, Show)
219 data Mod a = Mod Mod_Path a
220 deriving (Eq, Functor, Ord, Show)
221
222 -- * Class 'Gram_Term_Name'
223 class
224 ( Alt g
225 , Alter g
226 , Alter g
227 , App g
228 , Gram_CF g
229 , Gram_Op g
230 , Gram_Lexer g
231 , Gram_RegL g
232 , Gram_Rule g
233 , Gram_Terminal g
234 ) => Gram_Term_Name g where
235 mod_path :: CF g Mod_Path
236 mod_path = rule "mod_path" $
237 infixrG
238 (pure <$> mod_name)
239 (op <$ char '.')
240 where op = mappend
241 mod_name :: CF g Mod_Name
242 mod_name = rule "mod_name" $
243 (Mod_Name . Text.pack <$>) $
244 identG `minus`
245 (Fun.const
246 <$> term_keywords
247 <*. (any `but` term_idname_tail))
248 where
249 identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail)
250 headG = unicat $ Unicat Char.UppercaseLetter
251
252 term_mod_name :: CF g (Mod Term_Name)
253 term_mod_name = rule "term_mod_name" $
254 lexeme $
255 term_mod_idname <+>
256 parens term_mod_opname
257 term_name :: CF g Term_Name
258 term_name = rule "term_name" $
259 lexeme $
260 term_idname <+>
261 parens term_opname
262
263 term_mod_idname :: CF g (Mod Term_Name)
264 term_mod_idname = rule "term_mod_idname" $
265 Mod
266 <$> option [] (mod_path <* char '.')
267 <*> term_idname
268 term_idname :: CF g Term_Name
269 term_idname = rule "term_idname" $
270 (Term_Name . Text.pack <$>) $
271 (identG `minus`) $
272 Fun.const
273 <$> term_keywords
274 <*. (any `but` term_idname_tail)
275 where
276 identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail)
277 headG = unicat $ Unicat_Letter
278 term_idname_tail :: Terminal g Char
279 term_idname_tail = rule "term_idname_tail" $
280 unicat Unicat_Letter <+>
281 unicat Unicat_Number
282 term_keywords :: Reg rl g String
283 term_keywords = rule "term_keywords" $
284 choice $ string <$> ["in", "let"]
285
286 term_mod_opname :: CF g (Mod Term_Name)
287 term_mod_opname = rule "term_mod_opname" $
288 Mod
289 <$> option [] (mod_path <* char '.')
290 <*> term_opname
291 term_opname :: CF g Term_Name
292 term_opname = rule "term_opname" $
293 (Term_Name . Text.pack <$>) $
294 (symG `minus`) $
295 Fun.const
296 <$> term_keysyms
297 <*. (any `but` term_opname_ok)
298 where
299 symG = some $ cf_of_Terminal $ term_opname_ok
300 term_opname_ok :: Terminal g Char
301 term_opname_ok = rule "term_opname_ok" $
302 choice (unicat <$>
303 [ Unicat_Symbol
304 , Unicat_Punctuation
305 , Unicat_Mark
306 ]) `but` koG
307 where
308 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
309 term_keysyms :: Reg rl g String
310 term_keysyms = rule "term_keysyms" $
311 choice $ string <$> ["\\", "->", "="]
312
313 deriving instance Gram_Term_Name g => Gram_Term_Name (CF g)
314 instance Gram_Term_Name EBNF
315 instance Gram_Term_Name RuleDef
316
317 -- * Class 'Gram_Term_Type'
318 class
319 ( Alt g
320 , Alter g
321 , App g
322 , Gram_CF g
323 , Gram_Lexer g
324 , Gram_Meta meta g
325 , Gram_Rule g
326 , Gram_Terminal g
327 , Gram_Term_Name g
328 , Gram_Type meta g
329 ) => Gram_Term_Type meta g where
330 term_abst_decl
331 :: CF g (Term_Name, TokType meta)
332 term_abst_decl = rule "term_abst_decl" $
333 parens $ (,)
334 <$> term_name
335 <* symbol ":"
336 <*> typeG
337
338 deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g)
339 instance Gram_Term_Type meta EBNF
340 instance Gram_Term_Type meta RuleDef
341
342 -- * Class 'Gram_Error'
343 class Gram_Error g where
344 term_unError :: CF g (Either Error_Term_Gram a) -> CF g a
345 deriving instance Gram_Error g => Gram_Error (CF g)
346 instance Gram_Error EBNF where
347 term_unError (CF (EBNF g)) = CF $ EBNF g
348 instance Gram_Error RuleDef where
349 term_unError (CF (RuleDef (EBNF g))) =
350 CF $ RuleDef $ EBNF $ g
351
352 -- ** Type 'Error_Term_Gram'
353 data Error_Term_Gram
354 = Error_Term_Gram_Fixity Error_Fixity
355 | Error_Term_Gram_Cannot_apply_term
356 | Error_Term_Gram_Cannot_apply_type
357 | Error_Term_Gram_Undefined_term
358 | Error_Term_Gram_Term_incomplete
359 deriving (Eq, Show)
360
361 -- * Class 'Gram_Term'
362 class
363 ( Alt g
364 , Alter g
365 , App g
366 , Gram_CF g
367 , Gram_Lexer g
368 , Gram_Meta meta g
369 , Gram_Rule g
370 , Gram_Terminal g
371 , Gram_Error g
372 , Gram_Term_AtomsR meta ts ts g
373 , Gram_Term_Name g
374 , Gram_Term_Type meta g
375 , Gram_Type meta g
376 ) => Gram_Term ts meta g where
377 -- | Wrap 'term_abst'. Useful to modify body's scope.
378 term_abst_args_body
379 :: CF g [(Term_Name, TokType meta)]
380 -> CF g (EToken meta ts)
381 -> CF g ([(Term_Name, TokType meta)], EToken meta ts)
382 term_abst_args_body args body = (,) <$> args <*> body
383 term_tokenizers :: CF g (Tokenizers meta ts -> a) -> CF g a
384
385 termG
386 :: Inj_Tokens meta ts '[Proxy (->)]
387 => CF g (EToken meta ts)
388 termG = rule "term" $
389 choice $
390 [ term_abst
391 , term_let
392 , term_operators
393 ]
394 term_operators
395 :: Inj_Tokens meta ts '[Proxy (->)]
396 => CF g (EToken meta ts)
397 term_operators = rule "term_operators" $
398 term_unError $
399 term_unError $
400 left Error_Term_Gram_Fixity <$>
401 operators
402 (Right <$> term_app)
403 (term_unError $ metaG $ term_tokenizers $ op_prefix <$> term_op_prefix)
404 (term_unError $ metaG $ term_tokenizers $ op_infix <$> term_op_infix)
405 (term_unError $ metaG $ term_tokenizers $ op_postfix <$> term_op_postfix)
406 where
407 bqG :: Gram_Terminal g' => g' Char
408 bqG = char '`'
409 op_infix name toks meta = do
410 (_pre, in_, _post) <- protok name toks
411 return $
412 (term_fixity in_,) $ \ma mb -> do
413 a <- ma
414 b <- mb
415 unProTok =<< term_protok in_ meta `protok_app` [Right a, Right b]
416 op_prefix name toks meta = do
417 (pre, _in_, _post) <- protok name toks
418 case pre of
419 Just p ->
420 Right $ (term_fixity p,) $ (=<<) $ \a ->
421 unProTok =<< term_protok p meta `protok_app` [Right a]
422 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
423 op_postfix name toks meta = do
424 (_pre, _in_, post) <- protok name toks
425 case post of
426 Just p ->
427 Right $ (term_fixity p,) $ (=<<) $ \a ->
428 unProTok =<< term_protok p meta `protok_app` [Right a]
429 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
430 term_op_postfix :: CF g (Mod Term_Name)
431 term_op_postfix = rule "term_op_postfix" $
432 lexeme $
433 bqG *> term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` bqG)
434 term_mod_opname
435 term_op_infix :: CF g (Mod Term_Name)
436 term_op_infix = rule "term_op_infix" $
437 lexeme $
438 between bqG bqG term_mod_idname <+>
439 term_mod_opname
440 term_op_prefix :: CF g (Mod Term_Name)
441 term_op_prefix = rule "term_op_prefix" $
442 lexeme $
443 term_mod_idname <* bqG <+>
444 term_mod_opname
445 term_app
446 :: Inj_Tokens meta ts '[Proxy (->)]
447 => CF g (EToken meta ts)
448 term_app = rule "term_app" $
449 term_unError $
450 (\a as -> unProTok =<< protok_app a as)
451 <$> term_atom_proto
452 <*> many term_atom
453 term_atom
454 :: Inj_Tokens meta ts '[Proxy (->)]
455 => CF g (Either (EToken meta '[Proxy Token_Type])
456 (EToken meta ts))
457 term_atom = rule "term_atom" $
458 (Left <$ char '@' <*> typeG) <+>
459 (Right <$> term_unError (unProTok <$> term_atom_proto))
460 term_atom_proto
461 :: Inj_Tokens meta ts '[Proxy (->)]
462 => CF g (ProTok meta ts)
463 term_atom_proto =
464 choice $
465 term_atomsR (Proxy @ts) <>
466 [ metaG $ ((\(_, in_, _) -> term_protok in_) <$>) $ term_unError $ term_tokenizers $
467 protok <$> term_mod_name
468 , ProTok <$> term_group
469 ]
470 term_group
471 :: Inj_Tokens meta ts '[Proxy (->)]
472 => CF g (EToken meta ts)
473 term_group = rule "term_group" $ parens termG
474 term_abst
475 :: Inj_Tokens meta ts '[Proxy (->)]
476 => CF g (EToken meta ts)
477 term_abst = rule "term_abst" $
478 metaG $
479 ((\(xs, te) meta ->
480 List.foldr (\(x, ty_x) ->
481 inj_EToken meta .
482 Token_Term_Abst x ty_x) te xs) <$>) $
483 term_abst_args_body
484 (symbol "\\" *> some term_abst_decl <* symbol "->")
485 termG
486 term_let
487 :: Inj_Tokens meta ts '[Proxy (->)]
488 => CF g (EToken meta ts)
489 term_let = rule "term_let" $
490 metaG $
491 (\name args bound body meta ->
492 inj_EToken meta $
493 Token_Term_Let name
494 (List.foldr
495 (\(x, ty_x) -> inj_EToken meta . Token_Term_Abst x ty_x) bound args
496 ) body)
497 <$ symbol "let"
498 <*> term_name
499 <*> many term_abst_decl
500 <* symbol "="
501 <*> termG
502 <* symbol "in"
503 <*> termG
504
505 deriving instance
506 ( Gram_Term ts meta g
507 , Gram_Term_AtomsR meta ts ts (CF g)
508 ) => Gram_Term ts meta (CF g)
509 instance
510 Gram_Term_AtomsR meta ts ts EBNF =>
511 Gram_Term ts meta EBNF where
512 term_tokenizers (CF (EBNF g)) = CF $ EBNF g
513 instance
514 Gram_Term_AtomsR meta ts ts RuleDef =>
515 Gram_Term ts meta RuleDef where
516 term_tokenizers (CF (RuleDef (EBNF g))) =
517 CF $ RuleDef $ EBNF $ g
518
519 -- ** Class 'Gram_Term_AtomsR'
520 class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where
521 term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)]
522 instance Gram_Term_AtomsR meta ts '[] g where
523 term_atomsR _rs = []
524 instance
525 ( Gram_Term_AtomsT meta ts t g
526 , Gram_Term_AtomsR meta ts rs g
527 ) => Gram_Term_AtomsR meta ts (t ': rs) g where
528 term_atomsR _ =
529 term_atomsT (Proxy @t) <>
530 term_atomsR (Proxy @rs)
531
532 -- ** Class 'Gram_Term_AtomsT'
533 class Gram_Term_AtomsT meta ts t g where
534 term_atomsT :: Proxy t -> [CF g (ProTok meta ts)]
535 term_atomsT _t = []
536 instance Gram_Term_AtomsT meta ts t RuleDef
537
538 gram_term
539 :: forall g.
540 ( Gram_Term '[Proxy (->), Proxy Integer] () g
541 ) => [CF g ()]
542 gram_term =
543 [ ue termG
544 , ue term_operators
545 , ue term_app
546 , ug term_atom
547 , ue term_group
548 , ue term_abst
549 , void (term_abst_decl::CF g (Term_Name, TokType ()))
550 , ue term_let
551 , void term_mod_name
552 , void term_name
553 , void term_idname
554 , void $ cf_of_Terminal term_idname_tail
555 , void $ cf_of_Reg term_keywords
556 , void term_mod_opname
557 , void term_opname
558 , void $ cf_of_Terminal term_opname_ok
559 , void $ cf_of_Reg term_keysyms
560 ] where
561 ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g ()
562 ue = (() <$)
563 -- uf :: CF g (ProTok () '[Proxy (->)]) -> CF g ()
564 -- uf = (() <$)
565 ug :: CF g (Either (EToken () '[Proxy Token_Type])
566 (EToken () '[Proxy (->), Proxy Integer])) -> CF g ()
567 ug = (() <$)