1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
10 import Control.Arrow (left)
11 import Control.Monad (foldM, void, (=<<))
12 import qualified Data.Char as Char
13 import qualified Data.Function as Fun
14 import qualified Data.List as List
15 import Data.Map.Strict (Map)
16 import qualified Data.Map.Strict as Map
17 import Data.Monoid ((<>))
18 import Data.Proxy (Proxy(..))
19 import Data.Text (Text)
20 import qualified Data.Text as Text
21 import Prelude hiding (mod, not, any)
23 import Language.Symantic.Parsing
24 import Language.Symantic.Typing
27 newtype Term_Name = Term_Name Text
28 deriving (Eq, Ord, Show)
31 -- | Proto 'EToken'. It's almost like a free monad,
32 -- but has a third constructor ('ProTokPi')
33 -- to require a type argument.
35 -- NOTE: this type may one day be removed
36 -- if proper type inferencing is done.
37 -- In the meantime it is used to require
38 -- term or type arguments needed to build
39 -- the 'EToken's of polymorphic terms.
41 = ProTokLam (EToken meta ts -> ProTok meta ts)
42 -- ^ Require a term argument.
43 | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts)
44 -- ^ Require a type argument.
45 | ProTok (EToken meta ts)
46 -- ^ No need for any argument.
48 -- | Declared here and not in @Compiling.Lambda@
49 -- to be able to use 'Token_Term_Var' in 'protok'.
50 data instance TokenT meta (ts::[*]) (Proxy (->))
51 = Token_Term_Abst Term_Name (EToken meta '[Proxy Token_Type]) (EToken meta ts)
52 | Token_Term_App (EToken meta ts) (EToken meta ts)
53 | Token_Term_Let Term_Name (EToken meta ts) (EToken meta ts)
54 | Token_Term_Var Term_Name
55 | Token_Term_Compose (EToken meta ts) (EToken meta ts)
59 = TokenizeR meta ts ts
61 -- ** Type 'Tokenizers'
62 data Tokenizers meta ts
64 { tokenizers_prefix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
65 , tokenizers_infix :: Map Mod_Path (Map Term_Name (Term_ProTok Infix meta ts))
66 , tokenizers_postfix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
68 instance Monoid (Tokenizers meta ts) where
69 mempty = Tokenizers Map.empty Map.empty Map.empty
72 (Map.unionWith Map.union
74 (tokenizers_prefix y))
75 (Map.unionWith Map.union
78 (Map.unionWith Map.union
79 (tokenizers_postfix x)
80 (tokenizers_postfix y))
82 data Term_ProTok fixy meta ts
84 { term_protok :: meta -> ProTok meta ts
88 tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts
89 tokenizers = tokenizeR (Proxy @ts)
93 -> Either Error_Term_Gram (EToken meta ts)
94 unProTok (ProTok t) = Right t
95 unProTok _ = Left Error_Term_Gram_Term_incomplete
98 :: Inj_Token meta ts (->)
100 -> Tokenizers meta ts
101 -> Either Error_Term_Gram
102 ( Maybe (Term_ProTok Unifix meta ts)
103 , Term_ProTok Infix meta ts
104 , Maybe (Term_ProTok Unifix meta ts)
106 protok (mod `Mod` tn) (Tokenizers pres ins posts) = do
107 let pre = Map.lookup mod pres >>= Map.lookup tn
108 let post = Map.lookup mod posts >>= Map.lookup tn
109 in_ <- var_or_err $ Map.lookup mod ins >>= Map.lookup tn
110 return (pre, in_, post)
112 var_or_err (Just x) = Right x
115 [] -> Right (var infixN5)
116 _ -> Left $ Error_Term_Gram_Undefined_term
119 { term_protok = \meta -> ProTok $ inj_etoken meta $ Token_Term_Var tn
124 :: Inj_Token meta ts (->)
126 -> [Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)]
127 -> Either Error_Term_Gram (ProTok meta ts)
133 ProTokPi g -> Right $ g typ
134 _ -> Left Error_Term_Gram_Cannot_apply_type
137 ProTokLam f -> Right $ f te
138 ProTok tok@(EToken e) -> Right $
139 ProTok $ inj_etoken (meta_of e) $
140 Token_Term_App tok te
141 _ -> Left Error_Term_Gram_Cannot_apply_term
143 -- ** Class 'TokenizeR'
144 class TokenizeR meta (ts::[*]) (rs::[*]) where
145 tokenizeR :: Proxy rs -> Tokenizers meta ts
146 instance TokenizeR meta ts '[] where
147 tokenizeR _rs = mempty
149 ( TokenizeT meta ts t
150 , TokenizeR meta ts rs
151 ) => TokenizeR meta ts (t ': rs) where
153 tokenizeR (Proxy @rs) `mappend`
156 -- ** Class 'TokenizeT'
157 class TokenizeT meta ts t where
158 tokenizeT :: Proxy t -> Tokenizers meta ts
159 -- tokenizeT _t = [] `Mod` []
160 tokenizeT _t = mempty
164 -> [(Term_Name, Term_ProTok fix meta ts)]
165 -> Map Mod_Path (Map Term_Name (Term_ProTok fix meta ts))
166 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
169 :: Inj_Token meta ts t
170 => Text -> fixity -> TokenT meta ts (Proxy t)
171 -> (Term_Name, Term_ProTok fixity meta ts)
172 tokenize0 n term_fixity tok =
173 (Term_Name n,) Term_ProTok
174 { term_protok = \meta -> ProTok $ inj_etoken meta $ tok
178 :: Inj_Token meta ts t
180 -> (EToken meta ts -> TokenT meta ts (Proxy t))
181 -> (Term_Name, Term_ProTok fixity meta ts)
182 tokenize1 n term_fixity tok =
183 (Term_Name n,) Term_ProTok
184 { term_protok = \meta ->
186 ProTok $ inj_etoken meta $ tok a
190 :: Inj_Token meta ts t
192 -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
193 -> (Term_Name, Term_ProTok fixity meta ts)
194 tokenize2 n term_fixity tok =
195 (Term_Name n,) Term_ProTok
196 { term_protok = \meta ->
197 ProTokLam $ \a -> ProTokLam $ \b ->
198 ProTok $ inj_etoken meta $ tok a b
203 :: Inj_Token meta ts t
205 -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
206 -> (Term_Name, Term_ProTok fixity meta ts)
207 tokenize3 n term_fixity tok =
208 (Term_Name n,) Term_ProTok
209 { term_protok = \meta ->
210 ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c ->
211 ProTok $ inj_etoken meta $ tok a b c
216 type Mod_Path = [Mod_Name]
217 newtype Mod_Name = Mod_Name Text
218 deriving (Eq, Ord, Show)
219 data Mod a = Mod Mod_Path a
220 deriving (Eq, Functor, Ord, Show)
222 -- * Class 'Gram_Term_Name'
233 ) => Gram_Term_Name g where
234 mod_path :: CF g Mod_Path
235 mod_path = rule "mod_path" $
240 mod_name :: CF g Mod_Name
241 mod_name = rule "mod_name" $
242 (Mod_Name . Text.pack <$>) $
246 <*. (any `but` term_idname_tail))
248 identG = (:) <$> headG <*> many (cf_of_term term_idname_tail)
249 headG = unicat $ Unicat Char.UppercaseLetter
251 term_mod_name :: CF g (Mod Term_Name)
252 term_mod_name = rule "term_mod_name" $
255 parens term_mod_opname
256 term_name :: CF g Term_Name
257 term_name = rule "term_name" $
262 term_mod_idname :: CF g (Mod Term_Name)
263 term_mod_idname = rule "term_mod_idname" $
265 <$> option [] (mod_path <* char '.')
267 term_idname :: CF g Term_Name
268 term_idname = rule "term_idname" $
269 (Term_Name . Text.pack <$>) $
273 <*. (any `but` term_idname_tail)
275 identG = (:) <$> headG <*> many (cf_of_term term_idname_tail)
276 headG = unicat $ Unicat_Letter
277 term_idname_tail :: Terminal g Char
278 term_idname_tail = rule "term_idname_tail" $
279 unicat Unicat_Letter <+>
281 term_keywords :: Reg rl g String
282 term_keywords = rule "term_keywords" $
283 choice $ string <$> ["in", "let"]
285 term_mod_opname :: CF g (Mod Term_Name)
286 term_mod_opname = rule "term_mod_opname" $
288 <$> option [] (mod_path <* char '.')
290 term_opname :: CF g Term_Name
291 term_opname = rule "term_opname" $
292 (Term_Name . Text.pack <$>) $
296 <*. (any `but` term_opname_ok)
298 symG = some $ cf_of_term $ term_opname_ok
299 term_opname_ok :: Terminal g Char
300 term_opname_ok = rule "term_opname_ok" $
307 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
308 term_keysyms :: Reg rl g String
309 term_keysyms = rule "term_keysyms" $
310 choice $ string <$> ["\\", "->", "="]
312 deriving instance Gram_Term_Name g => Gram_Term_Name (CF g)
313 instance Gram_Term_Name EBNF
314 instance Gram_Term_Name RuleDef
316 -- * Class 'Gram_Term_Type'
328 ) => Gram_Term_Type meta g where
330 :: CF g (Term_Name, TokType meta)
331 term_abst_decl = rule "term_abst_decl" $
337 deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g)
338 instance Gram_Term_Type meta EBNF
339 instance Gram_Term_Type meta RuleDef
341 -- * Class 'Gram_Error'
342 class Gram_Error g where
343 term_unError :: CF g (Either Error_Term_Gram a) -> CF g a
344 deriving instance Gram_Error g => Gram_Error (CF g)
345 instance Gram_Error EBNF where
346 term_unError (CF (EBNF g)) = CF $ EBNF g
347 instance Gram_Error RuleDef where
348 term_unError (CF (RuleDef (EBNF g))) =
349 CF $ RuleDef $ EBNF $ g
351 -- ** Type 'Error_Term_Gram'
353 = Error_Term_Gram_Fixity Error_Fixity
354 | Error_Term_Gram_Cannot_apply_term
355 | Error_Term_Gram_Cannot_apply_type
356 | Error_Term_Gram_Undefined_term
357 | Error_Term_Gram_Term_incomplete
360 -- * Class 'Gram_Term'
371 , Gram_Term_AtomsR meta ts ts g
373 , Gram_Term_Type meta g
375 ) => Gram_Term ts meta g where
376 -- | Wrap 'term_abst'. Useful to modify body's scope.
378 :: CF g [(Term_Name, TokType meta)]
379 -> CF g (EToken meta ts)
380 -> CF g ([(Term_Name, TokType meta)], EToken meta ts)
381 term_abst_args_body args body = (,) <$> args <*> body
382 term_tokenizers :: CF g (Tokenizers meta ts -> a) -> CF g a
385 :: Inj_Tokens meta ts '[Proxy (->)]
386 => CF g (EToken meta ts)
387 termG = rule "term" $
394 :: Inj_Tokens meta ts '[Proxy (->)]
395 => CF g (EToken meta ts)
396 term_operators = rule "term_operators" $
399 left Error_Term_Gram_Fixity <$>
402 (term_unError $ metaG $ term_tokenizers $ op_prefix <$> term_op_prefix)
403 (term_unError $ metaG $ term_tokenizers $ op_infix <$> term_op_infix)
404 (term_unError $ metaG $ term_tokenizers $ op_postfix <$> term_op_postfix)
406 bqG :: Gram_Terminal g' => g' Char
408 op_infix name toks meta = do
409 (_pre, in_, _post) <- protok name toks
411 (term_fixity in_,) $ \ma mb -> do
414 unProTok =<< term_protok in_ meta `protok_app` [Right a, Right b]
415 op_prefix name toks meta = do
416 (pre, _in_, _post) <- protok name toks
419 Right $ (term_fixity p,) $ (=<<) $ \a ->
420 unProTok =<< term_protok p meta `protok_app` [Right a]
421 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
422 op_postfix name toks meta = do
423 (_pre, _in_, post) <- protok name toks
426 Right $ (term_fixity p,) $ (=<<) $ \a ->
427 unProTok =<< term_protok p meta `protok_app` [Right a]
428 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
429 term_op_postfix :: CF g (Mod Term_Name)
430 term_op_postfix = rule "term_op_postfix" $
432 bqG *> term_mod_idname <+> -- <* (cf_of_term $ Gram.Term (pure ' ') `but` bqG)
434 term_op_infix :: CF g (Mod Term_Name)
435 term_op_infix = rule "term_op_infix" $
437 between bqG bqG term_mod_idname <+>
439 term_op_prefix :: CF g (Mod Term_Name)
440 term_op_prefix = rule "term_op_prefix" $
442 term_mod_idname <* bqG <+>
445 :: Inj_Tokens meta ts '[Proxy (->)]
446 => CF g (EToken meta ts)
447 term_app = rule "term_app" $
449 (\a as -> unProTok =<< protok_app a as)
453 :: Inj_Tokens meta ts '[Proxy (->)]
454 => CF g (Either (EToken meta '[Proxy Token_Type])
456 term_atom = rule "term_atom" $
457 (Left <$ char '@' <*> typeG) <+>
458 (Right <$> term_unError (unProTok <$> term_atom_proto))
460 :: Inj_Tokens meta ts '[Proxy (->)]
461 => CF g (ProTok meta ts)
464 term_atomsR (Proxy @ts) <>
465 [ metaG $ ((\(_, in_, _) -> term_protok in_) <$>) $ term_unError $ term_tokenizers $
466 protok <$> term_mod_name
467 , ProTok <$> term_group
470 :: Inj_Tokens meta ts '[Proxy (->)]
471 => CF g (EToken meta ts)
472 term_group = rule "term_group" $ parens termG
474 :: Inj_Tokens meta ts '[Proxy (->)]
475 => CF g (EToken meta ts)
476 term_abst = rule "term_abst" $
479 List.foldr (\(x, ty_x) ->
481 Token_Term_Abst x ty_x) te xs) <$>) $
483 (symbol "\\" *> some term_abst_decl <* symbol "->")
486 :: Inj_Tokens meta ts '[Proxy (->)]
487 => CF g (EToken meta ts)
488 term_let = rule "term_let" $
490 (\name args bound body meta ->
494 (\(x, ty_x) -> inj_etoken meta . Token_Term_Abst x ty_x) bound args
498 <*> many term_abst_decl
505 ( Gram_Term ts meta g
506 , Gram_Term_AtomsR meta ts ts (CF g)
507 ) => Gram_Term ts meta (CF g)
509 Gram_Term_AtomsR meta ts ts EBNF =>
510 Gram_Term ts meta EBNF where
511 term_tokenizers (CF (EBNF g)) = CF $ EBNF g
513 Gram_Term_AtomsR meta ts ts RuleDef =>
514 Gram_Term ts meta RuleDef where
515 term_tokenizers (CF (RuleDef (EBNF g))) =
516 CF $ RuleDef $ EBNF $ g
518 -- ** Class 'Gram_Term_AtomsR'
519 class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where
520 term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)]
521 instance Gram_Term_AtomsR meta ts '[] g where
524 ( Gram_Term_AtomsT meta ts t g
525 , Gram_Term_AtomsR meta ts rs g
526 ) => Gram_Term_AtomsR meta ts (t ': rs) g where
528 term_atomsT (Proxy @t) <>
529 term_atomsR (Proxy @rs)
531 -- ** Class 'Gram_Term_AtomsT'
532 class Gram_Term_AtomsT meta ts t g where
533 term_atomsT :: Proxy t -> [CF g (ProTok meta ts)]
535 instance Gram_Term_AtomsT meta ts t RuleDef
539 ( Gram_Term '[Proxy (->), Proxy Integer] () g
548 , void (term_abst_decl::CF g (Term_Name, TokType ()))
553 , void $ cf_of_term term_idname_tail
554 , void $ cf_of_reg term_keywords
555 , void term_mod_opname
557 , void $ cf_of_term term_opname_ok
558 , void $ cf_of_reg term_keysyms
560 ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g ()
562 -- uf :: CF g (ProTok () '[Proxy (->)]) -> CF g ()
564 ug :: CF g (Either (EToken () '[Proxy Token_Type])
565 (EToken () '[Proxy (->), Proxy Integer])) -> CF g ()