1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
10 import Control.Arrow (left)
11 import Control.Monad (foldM, void, (=<<))
12 import qualified Data.Char as Char
13 import qualified Data.Function as Fun
14 import Data.Map.Strict (Map)
15 import qualified Data.Map.Strict as Map
16 import Data.Proxy (Proxy(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Text (Text)
19 import qualified Data.Text as Text
20 import Prelude hiding (mod, not, any)
22 import Language.Symantic.Parsing
23 import Language.Symantic.Typing
26 newtype Term_Name = Term_Name Text
27 deriving (Eq, Ord, Show)
30 -- | Proto 'EToken'. It's almost like a free monad,
31 -- but has a third constructor ('ProTokPi')
32 -- to require a type argument.
34 -- NOTE: this type may one day be removed
35 -- if proper type inferencing is done.
36 -- In the meantime it is used to require
37 -- term or type arguments needed to build
38 -- the 'EToken's of polymorphic terms.
40 = ProTokLam (EToken meta ts -> ProTok meta ts)
41 -- ^ Require a term argument.
42 | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts)
43 -- ^ Require a type argument.
44 | ProTok (EToken meta ts)
45 -- ^ No need for any argument.
47 -- | Declared here and not in @Compiling.Lambda@
48 -- to be able to use 'Token_Term_Var' in 'protok'.
49 data instance TokenT meta (ts::[*]) (Proxy (->))
50 = Token_Term_Abst Term_Name (EToken meta '[Proxy Token_Type]) (EToken meta ts)
51 | Token_Term_App (EToken meta ts) (EToken meta ts)
52 | Token_Term_Let Term_Name (EToken meta ts) (EToken meta ts)
53 | Token_Term_Var Term_Name
54 | Token_Term_Compose (EToken meta ts) (EToken meta ts)
58 = TokenizeR meta ts ts
60 -- ** Type 'Tokenizers'
61 data Tokenizers meta ts
63 { tokenizers_prefix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
64 , tokenizers_infix :: Map Mod_Path (Map Term_Name (Term_ProTok Infix meta ts))
65 , tokenizers_postfix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
67 instance Semigroup (Tokenizers meta ts) where
70 (Map.unionWith Map.union
72 (tokenizers_prefix y))
73 (Map.unionWith Map.union
76 (Map.unionWith Map.union
77 (tokenizers_postfix x)
78 (tokenizers_postfix y))
79 instance Monoid (Tokenizers meta ts) where
80 mempty = Tokenizers Map.empty Map.empty Map.empty
83 -- ** Type 'Term_ProTok'
84 data Term_ProTok fixy meta ts
86 { term_protok :: meta -> ProTok meta ts
90 tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts
91 tokenizers = tokenizeR (Proxy @ts)
95 -> Either Error_Term_Gram (EToken meta ts)
96 unProTok (ProTok t) = Right t
97 unProTok _ = Left Error_Term_Gram_Term_incomplete
100 :: Inj_Token meta ts (->)
102 -> Tokenizers meta ts
103 -> Either Error_Term_Gram
104 ( Maybe (Term_ProTok Unifix meta ts)
105 , Term_ProTok Infix meta ts
106 , Maybe (Term_ProTok Unifix meta ts)
108 protok (mod `Mod` tn) (Tokenizers pres ins posts) = do
109 let pre = Map.lookup mod pres >>= Map.lookup tn
110 let post = Map.lookup mod posts >>= Map.lookup tn
111 in_ <- var_or_err $ Map.lookup mod ins >>= Map.lookup tn
112 return (pre, in_, post)
114 var_or_err (Just x) = Right x
117 [] -> Right $ var infixN5
118 _ -> Left $ Error_Term_Gram_Undefined_term
121 { term_protok = \meta -> ProTok $ inj_EToken meta $ Token_Term_Var tn
126 :: Inj_Token meta ts (->)
128 -> [Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)]
129 -> Either Error_Term_Gram (ProTok meta ts)
135 ProTokPi g -> Right $ g typ
136 _ -> Left Error_Term_Gram_Cannot_apply_type
139 ProTokLam f -> Right $ f te
140 ProTok tok@(EToken e) -> Right $
141 ProTok $ inj_EToken (meta_of e) $
142 Token_Term_App tok te
143 _ -> Left Error_Term_Gram_Cannot_apply_term
145 -- ** Class 'TokenizeR'
146 class TokenizeR meta (ts::[*]) (rs::[*]) where
147 tokenizeR :: Proxy rs -> Tokenizers meta ts
148 instance TokenizeR meta ts '[] where
149 tokenizeR _rs = mempty
151 ( TokenizeT meta ts t
152 , TokenizeR meta ts rs
153 ) => TokenizeR meta ts (t ': rs) where
155 tokenizeR (Proxy @rs) <>
158 -- ** Class 'TokenizeT'
159 class TokenizeT meta ts t where
160 tokenizeT :: Proxy t -> Tokenizers meta ts
161 -- tokenizeT _t = [] `Mod` []
162 tokenizeT _t = mempty
166 -> [(Term_Name, Term_ProTok fix meta ts)]
167 -> Map Mod_Path (Map Term_Name (Term_ProTok fix meta ts))
168 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
171 :: Inj_Token meta ts t
172 => Text -> fixity -> TokenT meta ts (Proxy t)
173 -> (Term_Name, Term_ProTok fixity meta ts)
174 tokenize0 n term_fixity tok =
175 (Term_Name n,) Term_ProTok
176 { term_protok = \meta -> ProTok $ inj_EToken meta $ tok
180 :: Inj_Token meta ts t
182 -> (EToken meta ts -> TokenT meta ts (Proxy t))
183 -> (Term_Name, Term_ProTok fixity meta ts)
184 tokenize1 n term_fixity tok =
185 (Term_Name n,) Term_ProTok
186 { term_protok = \meta ->
188 ProTok $ inj_EToken meta $ tok a
192 :: Inj_Token meta ts t
194 -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
195 -> (Term_Name, Term_ProTok fixity meta ts)
196 tokenize2 n term_fixity tok =
197 (Term_Name n,) Term_ProTok
198 { term_protok = \meta ->
199 ProTokLam $ \a -> ProTokLam $ \b ->
200 ProTok $ inj_EToken meta $ tok a b
205 :: Inj_Token meta ts t
207 -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
208 -> (Term_Name, Term_ProTok fixity meta ts)
209 tokenize3 n term_fixity tok =
210 (Term_Name n,) Term_ProTok
211 { term_protok = \meta ->
212 ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c ->
213 ProTok $ inj_EToken meta $ tok a b c
218 type Mod_Path = [Mod_Name]
219 newtype Mod_Name = Mod_Name Text
220 deriving (Eq, Ord, Show)
221 data Mod a = Mod Mod_Path a
222 deriving (Eq, Functor, Ord, Show)
224 -- * Class 'Gram_Term_Name'
237 ) => Gram_Term_Name g where
238 mod_path :: CF g Mod_Path
239 mod_path = rule "mod_path" $
244 mod_name :: CF g Mod_Name
245 mod_name = rule "mod_name" $
246 (Mod_Name . Text.pack <$>) $
250 <*. (any `but` term_idname_tail))
252 identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail)
253 headG = unicat $ Unicat Char.UppercaseLetter
255 term_mod_name :: CF g (Mod Term_Name)
256 term_mod_name = rule "term_mod_name" $
259 parens term_mod_opname
260 term_name :: CF g Term_Name
261 term_name = rule "term_name" $
266 term_mod_idname :: CF g (Mod Term_Name)
267 term_mod_idname = rule "term_mod_idname" $
269 <$> option [] (try $ mod_path <* char '.')
271 term_idname :: CF g Term_Name
272 term_idname = rule "term_idname" $
273 (Term_Name . Text.pack <$>) $
277 <*. (any `but` term_idname_tail)
279 identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail)
280 headG = unicat $ Unicat_Letter
281 term_idname_tail :: Terminal g Char
282 term_idname_tail = rule "term_idname_tail" $
283 unicat Unicat_Letter <+>
285 term_keywords :: Reg rl g String
286 term_keywords = rule "term_keywords" $
287 choice $ string <$> ["in", "let"]
289 term_mod_opname :: CF g (Mod Term_Name)
290 term_mod_opname = rule "term_mod_opname" $
292 <$> option [] (try $ mod_path <* char '.')
294 term_opname :: CF g Term_Name
295 term_opname = rule "term_opname" $
296 (Term_Name . Text.pack <$>) $
300 <*. (any `but` term_opname_ok)
302 symG = some $ cf_of_Terminal term_opname_ok
303 term_opname_ok :: Terminal g Char
304 term_opname_ok = rule "term_opname_ok" $
311 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
312 term_keysyms :: Reg rl g String
313 term_keysyms = rule "term_keysyms" $
314 choice $ string <$> ["\\", "->", "="]
316 deriving instance Gram_Term_Name g => Gram_Term_Name (CF g)
317 instance Gram_Term_Name EBNF
318 instance Gram_Term_Name RuleDef
320 -- * Class 'Gram_Term_Type'
332 ) => Gram_Term_Type meta g where
334 :: CF g (Term_Name, TokType meta)
335 term_abst_decl = rule "term_abst_decl" $
341 deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g)
342 instance Gram_Term_Type meta EBNF
343 instance Gram_Term_Type meta RuleDef
345 -- * Class 'Gram_Error'
346 class Gram_Error g where
347 term_unError :: CF g (Either Error_Term_Gram a) -> CF g a
348 deriving instance Gram_Error g => Gram_Error (CF g)
349 instance Gram_Error EBNF where
350 term_unError (CF (EBNF g)) = CF $ EBNF g
351 instance Gram_Error RuleDef where
352 term_unError (CF (RuleDef (EBNF g))) =
353 CF $ RuleDef $ EBNF g
355 -- ** Type 'Error_Term_Gram'
357 = Error_Term_Gram_Fixity Error_Fixity
358 | Error_Term_Gram_Cannot_apply_term
359 | Error_Term_Gram_Cannot_apply_type
360 | Error_Term_Gram_Undefined_term
361 | Error_Term_Gram_Term_incomplete
364 -- * Class 'Gram_Term'
375 , Gram_Term_AtomsR meta ts ts g
377 , Gram_Term_Type meta g
379 ) => Gram_Term ts meta g where
380 -- | Wrap 'term_abst'. Useful to modify body's scope.
382 :: CF g [(Term_Name, TokType meta)]
383 -> CF g (EToken meta ts)
384 -> CF g ([(Term_Name, TokType meta)], EToken meta ts)
385 term_abst_args_body args body = (,) <$> args <*> body
386 term_tokenizers :: CF g (Tokenizers meta ts -> a) -> CF g a
389 :: Inj_Tokens meta ts '[Proxy (->)]
390 => CF g (EToken meta ts)
391 termG = rule "term" $
398 :: Inj_Tokens meta ts '[Proxy (->)]
399 => CF g (EToken meta ts)
400 term_operators = rule "term_operators" $
403 left Error_Term_Gram_Fixity <$>
406 (term_unError $ metaG $ term_tokenizers $ op_prefix <$> term_op_prefix)
407 (term_unError $ metaG $ term_tokenizers $ op_infix <$> term_op_infix)
408 (term_unError $ metaG $ term_tokenizers $ op_postfix <$> term_op_postfix)
410 bqG :: Gram_Terminal g' => g' Char
412 op_infix name toks meta = do
413 (_pre, in_, _post) <- protok name toks
415 (term_fixity in_,) $ \ma mb -> do
418 unProTok =<< term_protok in_ meta `protok_app` [Right a, Right b]
419 op_prefix name toks meta = do
420 (pre, _in_, _post) <- protok name toks
423 Right $ (term_fixity p,) $ (=<<) $ \a ->
424 unProTok =<< term_protok p meta `protok_app` [Right a]
425 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
426 op_postfix name toks meta = do
427 (_pre, _in_, post) <- protok name toks
430 Right $ (term_fixity p,) $ (=<<) $ \a ->
431 unProTok =<< term_protok p meta `protok_app` [Right a]
432 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
433 term_op_postfix :: CF g (Mod Term_Name)
434 term_op_postfix = rule "term_op_postfix" $
436 bqG *> term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` bqG)
438 term_op_infix :: CF g (Mod Term_Name)
439 term_op_infix = rule "term_op_infix" $
441 between bqG bqG term_mod_idname <+>
443 term_op_prefix :: CF g (Mod Term_Name)
444 term_op_prefix = rule "term_op_prefix" $
446 term_mod_idname <* bqG <+>
449 :: Inj_Tokens meta ts '[Proxy (->)]
450 => CF g (EToken meta ts)
451 term_app = rule "term_app" $
453 (\a as -> unProTok =<< protok_app a as)
455 <*> many (try term_atom)
457 :: Inj_Tokens meta ts '[Proxy (->)]
458 => CF g (Either (EToken meta '[Proxy Token_Type])
460 term_atom = rule "term_atom" $
461 (Left <$ char '@' <*> typeG) <+>
462 (Right <$> term_unError (unProTok <$> term_atom_proto))
464 :: Inj_Tokens meta ts '[Proxy (->)]
465 => CF g (ProTok meta ts)
468 try <$> term_atomsR (Proxy @ts) <>
470 metaG $ ((\(_, in_, _) -> term_protok in_) <$>) $
473 protok <$> term_mod_name
474 , ProTok <$> term_group
477 :: Inj_Tokens meta ts '[Proxy (->)]
478 => CF g (EToken meta ts)
479 term_group = rule "term_group" $ parens termG
481 :: Inj_Tokens meta ts '[Proxy (->)]
482 => CF g (EToken meta ts)
483 term_abst = rule "term_abst" $
488 Token_Term_Abst x ty_x) te xs) <$>) $
490 (symbol "\\" *> some term_abst_decl <* symbol "->")
493 :: Inj_Tokens meta ts '[Proxy (->)]
494 => CF g (EToken meta ts)
495 term_let = rule "term_let" $
497 (\name args bound body meta ->
500 (foldr (\(x, ty_x) ->
501 inj_EToken meta . Token_Term_Abst x ty_x) bound args) body)
504 <*> many term_abst_decl
511 ( Gram_Term ts meta g
512 , Gram_Term_AtomsR meta ts ts (CF g)
513 ) => Gram_Term ts meta (CF g)
515 Gram_Term_AtomsR meta ts ts EBNF =>
516 Gram_Term ts meta EBNF where
517 term_tokenizers (CF (EBNF g)) = CF $ EBNF g
519 Gram_Term_AtomsR meta ts ts RuleDef =>
520 Gram_Term ts meta RuleDef where
521 term_tokenizers (CF (RuleDef (EBNF g))) =
522 CF $ RuleDef $ EBNF g
524 -- ** Class 'Gram_Term_AtomsR'
525 class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where
526 term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)]
527 instance Gram_Term_AtomsR meta ts '[] g where
530 ( Gram_Term_AtomsT meta ts t g
531 , Gram_Term_AtomsR meta ts rs g
532 ) => Gram_Term_AtomsR meta ts (t ': rs) g where
534 term_atomsT (Proxy @t) <>
535 term_atomsR (Proxy @rs)
537 -- ** Class 'Gram_Term_AtomsT'
538 class Gram_Term_AtomsT meta ts t g where
539 term_atomsT :: Proxy t -> [CF g (ProTok meta ts)]
541 instance Gram_Term_AtomsT meta ts t RuleDef
545 ( Gram_Term '[Proxy (->), Proxy Integer] () g
554 , void (term_abst_decl::CF g (Term_Name, TokType ()))
559 , void $ cf_of_Terminal term_idname_tail
560 , void $ cf_of_Reg term_keywords
561 , void term_mod_opname
563 , void $ cf_of_Terminal term_opname_ok
564 , void $ cf_of_Reg term_keysyms
566 ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g ()
568 -- uf :: CF g (ProTok () '[Proxy (->)]) -> CF g ()
570 ug :: CF g (Either (EToken () '[Proxy Token_Type])
571 (EToken () '[Proxy (->), Proxy Integer])) -> CF g ()