1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
10 import Control.Arrow (left)
11 import Control.Monad (foldM, void, (=<<))
12 import qualified Data.Char as Char
13 import qualified Data.Function as Fun
14 import Data.Map.Strict (Map)
15 import qualified Data.Map.Strict as Map
16 import Data.Proxy (Proxy(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.String (IsString(..))
19 import Data.Text (Text)
20 import qualified Data.Text as Text
21 import Prelude hiding (mod, not, any)
23 import Language.Symantic.Parsing
24 import Language.Symantic.Typing
27 newtype TeName = TeName Text
28 deriving (Eq, Ord, Show)
29 instance IsString TeName where
30 fromString = TeName . fromString
33 -- | Proto 'EToken'. It's almost like a free monad,
34 -- but has a third constructor ('ProTokPi')
35 -- to require a type argument.
37 -- NOTE: this type may one day be removed
38 -- if proper type inferencing is done.
39 -- In the meantime it is used to require
40 -- term or type arguments needed to build
41 -- the 'EToken's of polymorphic terms.
43 = ProTokLam (EToken meta ts -> ProTok meta ts)
44 -- ^ Require a term argument.
45 | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts)
46 -- ^ Require a type argument.
47 | ProTok (EToken meta ts)
48 -- ^ No need for any argument.
50 -- | Declared here and not in @Compiling.Lambda@
51 -- to be able to use 'Token_Term_Var' in 'protok'.
52 data instance TokenT meta (ts::[*]) (Proxy (->))
53 = Token_Term_Abst TeName (EToken meta '[Proxy Token_Type]) (EToken meta ts)
54 | Token_Term_App (EToken meta ts) (EToken meta ts)
55 | Token_Term_Let TeName (EToken meta ts) (EToken meta ts)
56 | Token_Term_Var TeName
57 | Token_Term_Compose (EToken meta ts) (EToken meta ts)
61 = TokenizeR meta ts ts
63 -- ** Type 'Tokenizers'
64 data Tokenizers meta ts
66 { tokenizers_prefix :: Map Mod_Path (Map TeName (ProTok_Term Unifix meta ts))
67 , tokenizers_infix :: Map Mod_Path (Map TeName (ProTok_Term Infix meta ts))
68 , tokenizers_postfix :: Map Mod_Path (Map TeName (ProTok_Term Unifix meta ts))
70 instance Semigroup (Tokenizers meta ts) where
73 (Map.unionWith Map.union
75 (tokenizers_prefix y))
76 (Map.unionWith Map.union
79 (Map.unionWith Map.union
80 (tokenizers_postfix x)
81 (tokenizers_postfix y))
82 instance Monoid (Tokenizers meta ts) where
83 mempty = Tokenizers Map.empty Map.empty Map.empty
86 -- ** Type 'ProTok_Term'
87 data ProTok_Term fixy meta ts
89 { protok_term :: meta -> ProTok meta ts
90 , protok_fixity :: fixy
93 tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts
94 tokenizers = tokenizeR (Proxy @ts)
98 -> Either Error_Term_Gram (EToken meta ts)
99 unProTok (ProTok t) = Right t
100 unProTok _ = Left Error_Term_Gram_Term_incomplete
103 :: Inj_Token meta ts (->)
105 -> Tokenizers meta ts
106 -> Either Error_Term_Gram
107 ( Maybe (ProTok_Term Unifix meta ts)
108 , ProTok_Term Infix meta ts
109 , Maybe (ProTok_Term Unifix meta ts)
111 protok (mod `Mod` tn) (Tokenizers pres ins posts) = do
112 let pre = Map.lookup mod pres >>= Map.lookup tn
113 let post = Map.lookup mod posts >>= Map.lookup tn
114 in_ <- var_or_err $ Map.lookup mod ins >>= Map.lookup tn
115 return (pre, in_, post)
117 var_or_err (Just x) = Right x
120 [] -> Right $ var infixN5
121 _ -> Left $ Error_Term_Gram_Undefined_term
124 { protok_term = \meta -> ProTok $ inj_EToken meta $ Token_Term_Var tn
129 :: Inj_Token meta ts (->)
131 -> [Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)]
132 -> Either Error_Term_Gram (ProTok meta ts)
138 ProTokPi g -> Right $ g typ
139 _ -> Left Error_Term_Gram_Cannot_apply_type
142 ProTokLam f -> Right $ f te
143 ProTok tok@(EToken e) -> Right $
144 ProTok $ inj_EToken (meta_of e) $
145 Token_Term_App tok te
146 _ -> Left Error_Term_Gram_Cannot_apply_term
148 -- ** Class 'TokenizeR'
149 class TokenizeR meta (ts::[*]) (rs::[*]) where
150 tokenizeR :: Proxy rs -> Tokenizers meta ts
151 instance TokenizeR meta ts '[] where
152 tokenizeR _rs = mempty
154 ( TokenizeT meta ts t
155 , TokenizeR meta ts rs
156 ) => TokenizeR meta ts (t ': rs) where
158 tokenizeR (Proxy @rs) <>
161 -- ** Class 'TokenizeT'
162 class TokenizeT meta ts t where
163 tokenizeT :: Proxy t -> Tokenizers meta ts
164 -- tokenizeT _t = [] `Mod` []
165 tokenizeT _t = mempty
169 -> [(TeName, ProTok_Term fix meta ts)]
170 -> Map Mod_Path (Map TeName (ProTok_Term fix meta ts))
171 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
174 :: Inj_Token meta ts t
175 => Text -> fixity -> TokenT meta ts (Proxy t)
176 -> (TeName, ProTok_Term fixity meta ts)
177 tokenize0 n protok_fixity tok =
178 (TeName n,) ProTok_Term
179 { protok_term = \meta -> ProTok $ inj_EToken meta $ tok
183 :: Inj_Token meta ts t
185 -> (EToken meta ts -> TokenT meta ts (Proxy t))
186 -> (TeName, ProTok_Term fixity meta ts)
187 tokenize1 n protok_fixity tok =
188 (TeName n,) ProTok_Term
189 { protok_term = \meta ->
191 ProTok $ inj_EToken meta $ tok a
195 :: Inj_Token meta ts t
197 -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
198 -> (TeName, ProTok_Term fixity meta ts)
199 tokenize2 n protok_fixity tok =
200 (TeName n,) ProTok_Term
201 { protok_term = \meta ->
202 ProTokLam $ \a -> ProTokLam $ \b ->
203 ProTok $ inj_EToken meta $ tok a b
208 :: Inj_Token meta ts t
210 -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
211 -> (TeName, ProTok_Term fixity meta ts)
212 tokenize3 n protok_fixity tok =
213 (TeName n,) ProTok_Term
214 { protok_term = \meta ->
215 ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c ->
216 ProTok $ inj_EToken meta $ tok a b c
221 type Mod_Path = [Mod_Name]
222 newtype Mod_Name = Mod_Name Text
223 deriving (Eq, Ord, Show)
224 data Mod a = Mod Mod_Path a
225 deriving (Eq, Functor, Ord, Show)
227 -- * Class 'Gram_TeName'
240 ) => Gram_TeName g where
241 mod_path :: CF g Mod_Path
242 mod_path = rule "mod_path" $
247 mod_name :: CF g Mod_Name
248 mod_name = rule "mod_name" $
249 (Mod_Name . Text.pack <$>) $
253 <*. (any `but` term_idname_tail))
255 identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail)
256 headG = unicat $ Unicat Char.UppercaseLetter
258 term_mod_name :: CF g (Mod TeName)
259 term_mod_name = rule "term_mod_name" $
262 parens term_mod_opname
263 term_name :: CF g TeName
264 term_name = rule "term_name" $
269 term_mod_idname :: CF g (Mod TeName)
270 term_mod_idname = rule "term_mod_idname" $
272 <$> option [] (try $ mod_path <* char '.')
274 term_idname :: CF g TeName
275 term_idname = rule "term_idname" $
276 (TeName . Text.pack <$>) $
280 <*. (any `but` term_idname_tail)
282 identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail)
283 headG = unicat $ Unicat_Letter
284 term_idname_tail :: Terminal g Char
285 term_idname_tail = rule "term_idname_tail" $
286 unicat Unicat_Letter <+>
288 term_keywords :: Reg rl g String
289 term_keywords = rule "term_keywords" $
290 choice $ string <$> ["in", "let"]
292 term_mod_opname :: CF g (Mod TeName)
293 term_mod_opname = rule "term_mod_opname" $
295 <$> option [] (try $ mod_path <* char '.')
297 term_opname :: CF g TeName
298 term_opname = rule "term_opname" $
299 (TeName . Text.pack <$>) $
303 <*. (any `but` term_opname_ok)
305 symG = some $ cf_of_Terminal term_opname_ok
306 term_opname_ok :: Terminal g Char
307 term_opname_ok = rule "term_opname_ok" $
314 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
315 term_keysyms :: Reg rl g String
316 term_keysyms = rule "term_keysyms" $
317 choice $ string <$> ["\\", "->", "="]
319 deriving instance Gram_TeName g => Gram_TeName (CF g)
320 instance Gram_TeName EBNF
321 instance Gram_TeName RuleDef
323 -- * Class 'Gram_Term_Type'
335 ) => Gram_Term_Type meta g where
337 :: CF g (TeName, TokType meta)
338 term_abst_decl = rule "term_abst_decl" $
344 deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g)
345 instance Gram_Term_Type meta EBNF
346 instance Gram_Term_Type meta RuleDef
348 -- * Class 'Gram_Error'
349 class Gram_Error g where
350 term_unError :: CF g (Either Error_Term_Gram a) -> CF g a
351 deriving instance Gram_Error g => Gram_Error (CF g)
352 instance Gram_Error EBNF where
353 term_unError (CF (EBNF g)) = CF $ EBNF g
354 instance Gram_Error RuleDef where
355 term_unError (CF (RuleDef (EBNF g))) =
356 CF $ RuleDef $ EBNF g
358 -- ** Type 'Error_Term_Gram'
360 = Error_Term_Gram_Fixity Error_Fixity
361 | Error_Term_Gram_Cannot_apply_term
362 | Error_Term_Gram_Cannot_apply_type
363 | Error_Term_Gram_Undefined_term
364 | Error_Term_Gram_Term_incomplete
367 -- * Class 'Gram_Term'
378 , Gram_Term_AtomsR meta ts ts g
380 , Gram_Term_Type meta g
382 ) => Gram_Term ts meta g where
383 -- | Wrap 'term_abst'. Useful to modify body's scope.
385 :: CF g [(TeName, TokType meta)]
386 -> CF g (EToken meta ts)
387 -> CF g ([(TeName, TokType meta)], EToken meta ts)
388 term_abst_args_body args body = (,) <$> args <*> body
389 term_tokenizers :: CF g (Tokenizers meta ts -> a) -> CF g a
392 :: Inj_Tokens meta ts '[Proxy (->)]
393 => CF g (EToken meta ts)
394 termG = rule "term" $
401 :: Inj_Tokens meta ts '[Proxy (->)]
402 => CF g (EToken meta ts)
403 term_operators = rule "term_operators" $
406 left Error_Term_Gram_Fixity <$>
409 (term_unError $ metaG $ term_tokenizers $ op_prefix <$> term_op_prefix)
410 (term_unError $ metaG $ term_tokenizers $ op_infix <$> term_op_infix)
411 (term_unError $ metaG $ term_tokenizers $ op_postfix <$> term_op_postfix)
413 bqG :: Gram_Terminal g' => g' Char
415 op_infix name toks meta = do
416 (_pre, in_, _post) <- protok name toks
418 (protok_fixity in_,) $ \ma mb -> do
421 unProTok =<< protok_term in_ meta `protok_app` [Right a, Right b]
422 op_prefix name toks meta = do
423 (pre, _in_, _post) <- protok name toks
426 Right $ (protok_fixity p,) $ (=<<) $ \a ->
427 unProTok =<< protok_term p meta `protok_app` [Right a]
428 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
429 op_postfix name toks meta = do
430 (_pre, _in_, post) <- protok name toks
433 Right $ (protok_fixity p,) $ (=<<) $ \a ->
434 unProTok =<< protok_term p meta `protok_app` [Right a]
435 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
436 term_op_postfix :: CF g (Mod TeName)
437 term_op_postfix = rule "term_op_postfix" $
439 bqG *> term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` bqG)
441 term_op_infix :: CF g (Mod TeName)
442 term_op_infix = rule "term_op_infix" $
444 between bqG bqG term_mod_idname <+>
446 term_op_prefix :: CF g (Mod TeName)
447 term_op_prefix = rule "term_op_prefix" $
449 term_mod_idname <* bqG <+>
452 :: Inj_Tokens meta ts '[Proxy (->)]
453 => CF g (EToken meta ts)
454 term_app = rule "term_app" $
456 (\a as -> unProTok =<< protok_app a as)
458 <*> many (try term_atom)
460 :: Inj_Tokens meta ts '[Proxy (->)]
461 => CF g (Either (EToken meta '[Proxy Token_Type])
463 term_atom = rule "term_atom" $
464 (Left <$ char '@' <*> typeG) <+>
465 (Right <$> term_unError (unProTok <$> term_atom_proto))
467 :: Inj_Tokens meta ts '[Proxy (->)]
468 => CF g (ProTok meta ts)
471 try <$> term_atomsR (Proxy @ts) <>
473 metaG $ ((\(_, in_, _) -> protok_term in_) <$>) $
476 protok <$> term_mod_name
477 , ProTok <$> term_group
480 :: Inj_Tokens meta ts '[Proxy (->)]
481 => CF g (EToken meta ts)
482 term_group = rule "term_group" $ parens termG
484 :: Inj_Tokens meta ts '[Proxy (->)]
485 => CF g (EToken meta ts)
486 term_abst = rule "term_abst" $
491 Token_Term_Abst x ty_x) te xs) <$>) $
493 (symbol "\\" *> some term_abst_decl <* symbol "->")
496 :: Inj_Tokens meta ts '[Proxy (->)]
497 => CF g (EToken meta ts)
498 term_let = rule "term_let" $
500 (\name args bound body meta ->
503 (foldr (\(x, ty_x) ->
504 inj_EToken meta . Token_Term_Abst x ty_x) bound args) body)
507 <*> many term_abst_decl
514 ( Gram_Term ts meta g
515 , Gram_Term_AtomsR meta ts ts (CF g)
516 ) => Gram_Term ts meta (CF g)
518 Gram_Term_AtomsR meta ts ts EBNF =>
519 Gram_Term ts meta EBNF where
520 term_tokenizers (CF (EBNF g)) = CF $ EBNF g
522 Gram_Term_AtomsR meta ts ts RuleDef =>
523 Gram_Term ts meta RuleDef where
524 term_tokenizers (CF (RuleDef (EBNF g))) =
525 CF $ RuleDef $ EBNF g
527 -- ** Class 'Gram_Term_AtomsR'
528 class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where
529 term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)]
530 instance Gram_Term_AtomsR meta ts '[] g where
533 ( Gram_Term_AtomsT meta ts t g
534 , Gram_Term_AtomsR meta ts rs g
535 ) => Gram_Term_AtomsR meta ts (t ': rs) g where
537 term_atomsT (Proxy @t) <>
538 term_atomsR (Proxy @rs)
540 -- ** Class 'Gram_Term_AtomsT'
541 class Gram_Term_AtomsT meta ts t g where
542 term_atomsT :: Proxy t -> [CF g (ProTok meta ts)]
544 instance Gram_Term_AtomsT meta ts t RuleDef
548 ( Gram_Term '[Proxy (->), Proxy Integer] () g
557 , void (term_abst_decl::CF g (TeName, TokType ()))
562 , void $ cf_of_Terminal term_idname_tail
563 , void $ cf_of_Reg term_keywords
564 , void term_mod_opname
566 , void $ cf_of_Terminal term_opname_ok
567 , void $ cf_of_Reg term_keysyms
569 ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g ()
571 -- uf :: CF g (ProTok () '[Proxy (->)]) -> CF g ()
573 ug :: CF g (Either (EToken () '[Proxy Token_Type])
574 (EToken () '[Proxy (->), Proxy Integer])) -> CF g ()