]> Git — Sourcephile - haskell/symantic.git/blob - Language/Symantic/Compiling/Term/Grammar.hs
Fix module including.
[haskell/symantic.git] / Language / Symantic / Compiling / Term / Grammar.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
9
10 import Control.Arrow (left)
11 import Control.Monad (foldM, void, (=<<))
12 import qualified Data.Char as Char
13 import qualified Data.Function as Fun
14 import qualified Data.List as List
15 import Data.Map.Strict (Map)
16 import qualified Data.Map.Strict as Map
17 import Data.Monoid ((<>))
18 import Data.Proxy (Proxy(..))
19 import Data.Text (Text)
20 import qualified Data.Text as Text
21 import Prelude hiding (mod, not, any)
22
23 import Language.Symantic.Parsing
24 import Language.Symantic.Typing
25
26 -- * Type 'Term_Name'
27 newtype Term_Name = Term_Name Text
28 deriving (Eq, Ord, Show)
29
30 -- * Type 'ProTok'
31 -- | Proto 'EToken'. It's almost like a free monad,
32 -- but has a third constructor ('ProTokPi')
33 -- to require a type argument.
34 --
35 -- NOTE: this type may one day be removed
36 -- if proper type inferencing is done.
37 -- In the meantime it is used to require
38 -- term or type arguments needed to build
39 -- the 'EToken's of polymorphic terms.
40 data ProTok meta ts
41 = ProTokLam (EToken meta ts -> ProTok meta ts)
42 -- ^ Require a term argument.
43 | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts)
44 -- ^ Require a type argument.
45 | ProTok (EToken meta ts)
46 -- ^ No need for any argument.
47
48 -- | Declared here and not in @Compiling.Lambda@
49 -- to be able to use 'Token_Term_Var' in 'protok'.
50 data instance TokenT meta (ts::[*]) (Proxy (->))
51 = Token_Term_Abst Term_Name (EToken meta '[Proxy Token_Type]) (EToken meta ts)
52 | Token_Term_App (EToken meta ts) (EToken meta ts)
53 | Token_Term_Let Term_Name (EToken meta ts) (EToken meta ts)
54 | Token_Term_Var Term_Name
55 | Token_Term_Compose (EToken meta ts) (EToken meta ts)
56
57 -- * Class 'Tokenize'
58 type Tokenize meta ts
59 = TokenizeR meta ts ts
60
61 -- ** Type 'Tokenizers'
62 data Tokenizers meta ts
63 = Tokenizers
64 { tokenizers_prefix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
65 , tokenizers_infix :: Map Mod_Path (Map Term_Name (Term_ProTok Infix meta ts))
66 , tokenizers_postfix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
67 }
68 instance Monoid (Tokenizers meta ts) where
69 mempty = Tokenizers Map.empty Map.empty Map.empty
70 mappend x y =
71 Tokenizers
72 (Map.unionWith Map.union
73 (tokenizers_prefix x)
74 (tokenizers_prefix y))
75 (Map.unionWith Map.union
76 (tokenizers_infix x)
77 (tokenizers_infix y))
78 (Map.unionWith Map.union
79 (tokenizers_postfix x)
80 (tokenizers_postfix y))
81
82 data Term_ProTok fixy meta ts
83 = Term_ProTok
84 { term_protok :: meta -> ProTok meta ts
85 , term_fixity :: fixy
86 }
87
88 tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts
89 tokenizers = tokenizeR (Proxy @ts)
90
91 unProTok
92 :: ProTok meta ts
93 -> Either Error_Term_Gram (EToken meta ts)
94 unProTok (ProTok t) = Right t
95 unProTok _ = Left Error_Term_Gram_Term_incomplete
96
97 protok
98 :: Inj_Token meta ts (->)
99 => Mod Term_Name
100 -> Tokenizers meta ts
101 -> Either Error_Term_Gram
102 ( Maybe (Term_ProTok Unifix meta ts)
103 , Term_ProTok Infix meta ts
104 , Maybe (Term_ProTok Unifix meta ts)
105 )
106 protok (mod `Mod` tn) (Tokenizers pres ins posts) = do
107 let pre = Map.lookup mod pres >>= Map.lookup tn
108 let post = Map.lookup mod posts >>= Map.lookup tn
109 in_ <- var_or_err $ Map.lookup mod ins >>= Map.lookup tn
110 return (pre, in_, post)
111 where
112 var_or_err (Just x) = Right x
113 var_or_err Nothing =
114 case mod of
115 [] -> Right (var infixN5)
116 _ -> Left $ Error_Term_Gram_Undefined_term
117 var term_fixity =
118 Term_ProTok
119 { term_protok = \meta -> ProTok $ inj_etoken meta $ Token_Term_Var tn
120 , term_fixity
121 }
122
123 protok_app
124 :: Inj_Token meta ts (->)
125 => ProTok meta ts
126 -> [Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)]
127 -> Either Error_Term_Gram (ProTok meta ts)
128 protok_app =
129 foldM app
130 where
131 app acc (Left typ) =
132 case acc of
133 ProTokPi g -> Right $ g typ
134 _ -> Left Error_Term_Gram_Cannot_apply_type
135 app acc (Right te) =
136 case acc of
137 ProTokLam f -> Right $ f te
138 ProTok tok@(EToken e) -> Right $
139 ProTok $ inj_etoken (meta_of e) $
140 Token_Term_App tok te
141 _ -> Left Error_Term_Gram_Cannot_apply_term
142
143 -- ** Class 'TokenizeR'
144 class TokenizeR meta (ts::[*]) (rs::[*]) where
145 tokenizeR :: Proxy rs -> Tokenizers meta ts
146 instance TokenizeR meta ts '[] where
147 tokenizeR _rs = mempty
148 instance
149 ( TokenizeT meta ts t
150 , TokenizeR meta ts rs
151 ) => TokenizeR meta ts (t ': rs) where
152 tokenizeR _ =
153 tokenizeR (Proxy @rs) `mappend`
154 tokenizeT (Proxy @t)
155
156 -- ** Class 'TokenizeT'
157 class TokenizeT meta ts t where
158 tokenizeT :: Proxy t -> Tokenizers meta ts
159 -- tokenizeT _t = [] `Mod` []
160 tokenizeT _t = mempty
161
162 tokenizeTMod
163 :: Mod_Path
164 -> [(Term_Name, Term_ProTok fix meta ts)]
165 -> Map Mod_Path (Map Term_Name (Term_ProTok fix meta ts))
166 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
167
168 tokenize0
169 :: Inj_Token meta ts t
170 => Text -> fixity -> TokenT meta ts (Proxy t)
171 -> (Term_Name, Term_ProTok fixity meta ts)
172 tokenize0 n term_fixity tok =
173 (Term_Name n,) Term_ProTok
174 { term_protok = \meta -> ProTok $ inj_etoken meta $ tok
175 , term_fixity }
176
177 tokenize1
178 :: Inj_Token meta ts t
179 => Text -> fixity
180 -> (EToken meta ts -> TokenT meta ts (Proxy t))
181 -> (Term_Name, Term_ProTok fixity meta ts)
182 tokenize1 n term_fixity tok =
183 (Term_Name n,) Term_ProTok
184 { term_protok = \meta ->
185 ProTokLam $ \a ->
186 ProTok $ inj_etoken meta $ tok a
187 , term_fixity }
188
189 tokenize2
190 :: Inj_Token meta ts t
191 => Text -> fixity
192 -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
193 -> (Term_Name, Term_ProTok fixity meta ts)
194 tokenize2 n term_fixity tok =
195 (Term_Name n,) Term_ProTok
196 { term_protok = \meta ->
197 ProTokLam $ \a -> ProTokLam $ \b ->
198 ProTok $ inj_etoken meta $ tok a b
199 , term_fixity
200 }
201
202 tokenize3
203 :: Inj_Token meta ts t
204 => Text -> fixity
205 -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
206 -> (Term_Name, Term_ProTok fixity meta ts)
207 tokenize3 n term_fixity tok =
208 (Term_Name n,) Term_ProTok
209 { term_protok = \meta ->
210 ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c ->
211 ProTok $ inj_etoken meta $ tok a b c
212 , term_fixity
213 }
214
215 -- * Type 'Mod'
216 type Mod_Path = [Mod_Name]
217 newtype Mod_Name = Mod_Name Text
218 deriving (Eq, Ord, Show)
219 data Mod a = Mod Mod_Path a
220 deriving (Eq, Functor, Ord, Show)
221
222 -- * Class 'Gram_Term_Name'
223 class
224 ( Alt g
225 , Alter g
226 , Alter g
227 , App g
228 , Gram_CF g
229 , Gram_Lexer g
230 , Gram_RegL g
231 , Gram_Rule g
232 , Gram_Terminal g
233 ) => Gram_Term_Name g where
234 mod_path :: CF g Mod_Path
235 mod_path = rule "mod_path" $
236 infixrG
237 (pure <$> mod_name)
238 (op <$ char '.')
239 where op = mappend
240 mod_name :: CF g Mod_Name
241 mod_name = rule "mod_name" $
242 (Mod_Name . Text.pack <$>) $
243 identG `minus`
244 (Fun.const
245 <$> term_keywords
246 <*. (any `but` term_idname_tail))
247 where
248 identG = (:) <$> headG <*> many (cf_of_term term_idname_tail)
249 headG = unicat $ Unicat Char.UppercaseLetter
250
251 term_mod_name :: CF g (Mod Term_Name)
252 term_mod_name = rule "term_mod_name" $
253 lexeme $
254 term_mod_idname <+>
255 parens term_mod_opname
256 term_name :: CF g Term_Name
257 term_name = rule "term_name" $
258 lexeme $
259 term_idname <+>
260 parens term_opname
261
262 term_mod_idname :: CF g (Mod Term_Name)
263 term_mod_idname = rule "term_mod_idname" $
264 Mod
265 <$> option [] (mod_path <* char '.')
266 <*> term_idname
267 term_idname :: CF g Term_Name
268 term_idname = rule "term_idname" $
269 (Term_Name . Text.pack <$>) $
270 (identG `minus`) $
271 Fun.const
272 <$> term_keywords
273 <*. (any `but` term_idname_tail)
274 where
275 identG = (:) <$> headG <*> many (cf_of_term term_idname_tail)
276 headG = unicat $ Unicat_Letter
277 term_idname_tail :: Terminal g Char
278 term_idname_tail = rule "term_idname_tail" $
279 unicat Unicat_Letter <+>
280 unicat Unicat_Number
281 term_keywords :: Reg rl g String
282 term_keywords = rule "term_keywords" $
283 choice $ string <$> ["in", "let"]
284
285 term_mod_opname :: CF g (Mod Term_Name)
286 term_mod_opname = rule "term_mod_opname" $
287 Mod
288 <$> option [] (mod_path <* char '.')
289 <*> term_opname
290 term_opname :: CF g Term_Name
291 term_opname = rule "term_opname" $
292 (Term_Name . Text.pack <$>) $
293 (symG `minus`) $
294 Fun.const
295 <$> term_keysyms
296 <*. (any `but` term_opname_ok)
297 where
298 symG = some $ cf_of_term $ term_opname_ok
299 term_opname_ok :: Terminal g Char
300 term_opname_ok = rule "term_opname_ok" $
301 choice (unicat <$>
302 [ Unicat_Symbol
303 , Unicat_Punctuation
304 , Unicat_Mark
305 ]) `but` koG
306 where
307 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
308 term_keysyms :: Reg rl g String
309 term_keysyms = rule "term_keysyms" $
310 choice $ string <$> ["\\", "->", "="]
311
312 deriving instance Gram_Term_Name g => Gram_Term_Name (CF g)
313 instance Gram_Term_Name EBNF
314 instance Gram_Term_Name RuleDef
315
316 -- * Class 'Gram_Term_Type'
317 class
318 ( Alt g
319 , Alter g
320 , App g
321 , Gram_CF g
322 , Gram_Lexer g
323 , Gram_Meta meta g
324 , Gram_Rule g
325 , Gram_Terminal g
326 , Gram_Term_Name g
327 , Gram_Type meta g
328 ) => Gram_Term_Type meta g where
329 term_abst_decl
330 :: CF g (Term_Name, TokType meta)
331 term_abst_decl = rule "term_abst_decl" $
332 parens $ (,)
333 <$> term_name
334 <* symbol ":"
335 <*> typeG
336
337 deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g)
338 instance Gram_Term_Type meta EBNF
339 instance Gram_Term_Type meta RuleDef
340
341 -- * Class 'Gram_Error'
342 class Gram_Error g where
343 term_unError :: CF g (Either Error_Term_Gram a) -> CF g a
344 deriving instance Gram_Error g => Gram_Error (CF g)
345 instance Gram_Error EBNF where
346 term_unError (CF (EBNF g)) = CF $ EBNF g
347 instance Gram_Error RuleDef where
348 term_unError (CF (RuleDef (EBNF g))) =
349 CF $ RuleDef $ EBNF $ g
350
351 -- ** Type 'Error_Term_Gram'
352 data Error_Term_Gram
353 = Error_Term_Gram_Fixity Error_Fixity
354 | Error_Term_Gram_Cannot_apply_term
355 | Error_Term_Gram_Cannot_apply_type
356 | Error_Term_Gram_Undefined_term
357 | Error_Term_Gram_Term_incomplete
358 deriving (Eq, Show)
359
360 -- * Class 'Gram_Term'
361 class
362 ( Alt g
363 , Alter g
364 , App g
365 , Gram_CF g
366 , Gram_Lexer g
367 , Gram_Meta meta g
368 , Gram_Rule g
369 , Gram_Terminal g
370 , Gram_Error g
371 , Gram_Term_AtomsR meta ts ts g
372 , Gram_Term_Name g
373 , Gram_Term_Type meta g
374 , Gram_Type meta g
375 ) => Gram_Term ts meta g where
376 -- | Wrap 'term_abst'. Useful to modify body's scope.
377 term_abst_args_body
378 :: CF g [(Term_Name, TokType meta)]
379 -> CF g (EToken meta ts)
380 -> CF g ([(Term_Name, TokType meta)], EToken meta ts)
381 term_abst_args_body args body = (,) <$> args <*> body
382 term_tokenizers :: CF g (Tokenizers meta ts -> a) -> CF g a
383
384 termG
385 :: Inj_Tokens meta ts '[Proxy (->)]
386 => CF g (EToken meta ts)
387 termG = rule "term" $
388 choice $
389 [ term_abst
390 , term_let
391 , term_operators
392 ]
393 term_operators
394 :: Inj_Tokens meta ts '[Proxy (->)]
395 => CF g (EToken meta ts)
396 term_operators = rule "term_operators" $
397 term_unError $
398 term_unError $
399 left Error_Term_Gram_Fixity <$>
400 operators
401 (Right <$> term_app)
402 (term_unError $ metaG $ term_tokenizers $ op_prefix <$> term_op_prefix)
403 (term_unError $ metaG $ term_tokenizers $ op_infix <$> term_op_infix)
404 (term_unError $ metaG $ term_tokenizers $ op_postfix <$> term_op_postfix)
405 where
406 bqG :: Gram_Terminal g' => g' Char
407 bqG = char '`'
408 op_infix name toks meta = do
409 (_pre, in_, _post) <- protok name toks
410 return $
411 (term_fixity in_,) $ \ma mb -> do
412 a <- ma
413 b <- mb
414 unProTok =<< term_protok in_ meta `protok_app` [Right a, Right b]
415 op_prefix name toks meta = do
416 (pre, _in_, _post) <- protok name toks
417 case pre of
418 Just p ->
419 Right $ (term_fixity p,) $ (=<<) $ \a ->
420 unProTok =<< term_protok p meta `protok_app` [Right a]
421 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
422 op_postfix name toks meta = do
423 (_pre, _in_, post) <- protok name toks
424 case post of
425 Just p ->
426 Right $ (term_fixity p,) $ (=<<) $ \a ->
427 unProTok =<< term_protok p meta `protok_app` [Right a]
428 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
429 term_op_postfix :: CF g (Mod Term_Name)
430 term_op_postfix = rule "term_op_postfix" $
431 lexeme $
432 bqG *> term_mod_idname <+> -- <* (cf_of_term $ Gram.Term (pure ' ') `but` bqG)
433 term_mod_opname
434 term_op_infix :: CF g (Mod Term_Name)
435 term_op_infix = rule "term_op_infix" $
436 lexeme $
437 between bqG bqG term_mod_idname <+>
438 term_mod_opname
439 term_op_prefix :: CF g (Mod Term_Name)
440 term_op_prefix = rule "term_op_prefix" $
441 lexeme $
442 term_mod_idname <* bqG <+>
443 term_mod_opname
444 term_app
445 :: Inj_Tokens meta ts '[Proxy (->)]
446 => CF g (EToken meta ts)
447 term_app = rule "term_app" $
448 term_unError $
449 (\a as -> unProTok =<< protok_app a as)
450 <$> term_atom_proto
451 <*> many term_atom
452 term_atom
453 :: Inj_Tokens meta ts '[Proxy (->)]
454 => CF g (Either (EToken meta '[Proxy Token_Type])
455 (EToken meta ts))
456 term_atom = rule "term_atom" $
457 (Left <$ char '@' <*> typeG) <+>
458 (Right <$> term_unError (unProTok <$> term_atom_proto))
459 term_atom_proto
460 :: Inj_Tokens meta ts '[Proxy (->)]
461 => CF g (ProTok meta ts)
462 term_atom_proto =
463 choice $
464 term_atomsR (Proxy @ts) <>
465 [ metaG $ ((\(_, in_, _) -> term_protok in_) <$>) $ term_unError $ term_tokenizers $
466 protok <$> term_mod_name
467 , ProTok <$> term_group
468 ]
469 term_group
470 :: Inj_Tokens meta ts '[Proxy (->)]
471 => CF g (EToken meta ts)
472 term_group = rule "term_group" $ parens termG
473 term_abst
474 :: Inj_Tokens meta ts '[Proxy (->)]
475 => CF g (EToken meta ts)
476 term_abst = rule "term_abst" $
477 metaG $
478 ((\(xs, te) meta ->
479 List.foldr (\(x, ty_x) ->
480 inj_etoken meta .
481 Token_Term_Abst x ty_x) te xs) <$>) $
482 term_abst_args_body
483 (symbol "\\" *> some term_abst_decl <* symbol "->")
484 termG
485 term_let
486 :: Inj_Tokens meta ts '[Proxy (->)]
487 => CF g (EToken meta ts)
488 term_let = rule "term_let" $
489 metaG $
490 (\name args bound body meta ->
491 inj_etoken meta $
492 Token_Term_Let name
493 (List.foldr
494 (\(x, ty_x) -> inj_etoken meta . Token_Term_Abst x ty_x) bound args
495 ) body)
496 <$ symbol "let"
497 <*> term_name
498 <*> many term_abst_decl
499 <* symbol "="
500 <*> termG
501 <* symbol "in"
502 <*> termG
503
504 deriving instance
505 ( Gram_Term ts meta g
506 , Gram_Term_AtomsR meta ts ts (CF g)
507 ) => Gram_Term ts meta (CF g)
508 instance
509 Gram_Term_AtomsR meta ts ts EBNF =>
510 Gram_Term ts meta EBNF where
511 term_tokenizers (CF (EBNF g)) = CF $ EBNF g
512 instance
513 Gram_Term_AtomsR meta ts ts RuleDef =>
514 Gram_Term ts meta RuleDef where
515 term_tokenizers (CF (RuleDef (EBNF g))) =
516 CF $ RuleDef $ EBNF $ g
517
518 -- ** Class 'Gram_Term_AtomsR'
519 class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where
520 term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)]
521 instance Gram_Term_AtomsR meta ts '[] g where
522 term_atomsR _rs = []
523 instance
524 ( Gram_Term_AtomsT meta ts t g
525 , Gram_Term_AtomsR meta ts rs g
526 ) => Gram_Term_AtomsR meta ts (t ': rs) g where
527 term_atomsR _ =
528 term_atomsT (Proxy @t) <>
529 term_atomsR (Proxy @rs)
530
531 -- ** Class 'Gram_Term_AtomsT'
532 class Gram_Term_AtomsT meta ts t g where
533 term_atomsT :: Proxy t -> [CF g (ProTok meta ts)]
534 term_atomsT _t = []
535 instance Gram_Term_AtomsT meta ts t RuleDef
536
537 gram_term
538 :: forall g.
539 ( Gram_Term '[Proxy (->), Proxy Integer] () g
540 ) => [CF g ()]
541 gram_term =
542 [ ue termG
543 , ue term_operators
544 , ue term_app
545 , ug term_atom
546 , ue term_group
547 , ue term_abst
548 , void (term_abst_decl::CF g (Term_Name, TokType ()))
549 , ue term_let
550 , void term_mod_name
551 , void term_name
552 , void term_idname
553 , void $ cf_of_term term_idname_tail
554 , void $ cf_of_reg term_keywords
555 , void term_mod_opname
556 , void term_opname
557 , void $ cf_of_term term_opname_ok
558 , void $ cf_of_reg term_keysyms
559 ] where
560 ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g ()
561 ue = (() <$)
562 -- uf :: CF g (ProTok () '[Proxy (->)]) -> CF g ()
563 -- uf = (() <$)
564 ug :: CF g (Either (EToken () '[Proxy Token_Type])
565 (EToken () '[Proxy (->), Proxy Integer])) -> CF g ()
566 ug = (() <$)