]> Git — Sourcephile - haskell/symantic.git/blob - symantic/Language/Symantic/Compiling/Term/Grammar.hs
Add IsString instances.
[haskell/symantic.git] / symantic / Language / Symantic / Compiling / Term / Grammar.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE PolyKinds #-}
6 {-# LANGUAGE UndecidableInstances #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 module Language.Symantic.Compiling.Term.Grammar where
9
10 import Control.Arrow (left)
11 import Control.Monad (foldM, void, (=<<))
12 import qualified Data.Char as Char
13 import qualified Data.Function as Fun
14 import Data.Map.Strict (Map)
15 import qualified Data.Map.Strict as Map
16 import Data.Proxy (Proxy(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.String (IsString(..))
19 import Data.Text (Text)
20 import qualified Data.Text as Text
21 import Prelude hiding (mod, not, any)
22
23 import Language.Symantic.Parsing
24 import Language.Symantic.Typing
25
26 -- * Type 'Term_Name'
27 newtype Term_Name = Term_Name Text
28 deriving (Eq, Ord, Show)
29 instance IsString Term_Name where
30 fromString = Term_Name . fromString
31
32 -- * Type 'ProTok'
33 -- | Proto 'EToken'. It's almost like a free monad,
34 -- but has a third constructor ('ProTokPi')
35 -- to require a type argument.
36 --
37 -- NOTE: this type may one day be removed
38 -- if proper type inferencing is done.
39 -- In the meantime it is used to require
40 -- term or type arguments needed to build
41 -- the 'EToken's of polymorphic terms.
42 data ProTok meta ts
43 = ProTokLam (EToken meta ts -> ProTok meta ts)
44 -- ^ Require a term argument.
45 | ProTokPi (EToken meta '[Proxy Token_Type] -> ProTok meta ts)
46 -- ^ Require a type argument.
47 | ProTok (EToken meta ts)
48 -- ^ No need for any argument.
49
50 -- | Declared here and not in @Compiling.Lambda@
51 -- to be able to use 'Token_Term_Var' in 'protok'.
52 data instance TokenT meta (ts::[*]) (Proxy (->))
53 = Token_Term_Abst Term_Name (EToken meta '[Proxy Token_Type]) (EToken meta ts)
54 | Token_Term_App (EToken meta ts) (EToken meta ts)
55 | Token_Term_Let Term_Name (EToken meta ts) (EToken meta ts)
56 | Token_Term_Var Term_Name
57 | Token_Term_Compose (EToken meta ts) (EToken meta ts)
58
59 -- * Class 'Tokenize'
60 type Tokenize meta ts
61 = TokenizeR meta ts ts
62
63 -- ** Type 'Tokenizers'
64 data Tokenizers meta ts
65 = Tokenizers
66 { tokenizers_prefix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
67 , tokenizers_infix :: Map Mod_Path (Map Term_Name (Term_ProTok Infix meta ts))
68 , tokenizers_postfix :: Map Mod_Path (Map Term_Name (Term_ProTok Unifix meta ts))
69 }
70 instance Semigroup (Tokenizers meta ts) where
71 x <> y =
72 Tokenizers
73 (Map.unionWith Map.union
74 (tokenizers_prefix x)
75 (tokenizers_prefix y))
76 (Map.unionWith Map.union
77 (tokenizers_infix x)
78 (tokenizers_infix y))
79 (Map.unionWith Map.union
80 (tokenizers_postfix x)
81 (tokenizers_postfix y))
82 instance Monoid (Tokenizers meta ts) where
83 mempty = Tokenizers Map.empty Map.empty Map.empty
84 mappend = (<>)
85
86 -- ** Type 'Term_ProTok'
87 data Term_ProTok fixy meta ts
88 = Term_ProTok
89 { term_protok :: meta -> ProTok meta ts
90 , term_fixity :: fixy
91 }
92
93 tokenizers :: forall meta ts. Tokenize meta ts => Tokenizers meta ts
94 tokenizers = tokenizeR (Proxy @ts)
95
96 unProTok
97 :: ProTok meta ts
98 -> Either Error_Term_Gram (EToken meta ts)
99 unProTok (ProTok t) = Right t
100 unProTok _ = Left Error_Term_Gram_Term_incomplete
101
102 protok
103 :: Inj_Token meta ts (->)
104 => Mod Term_Name
105 -> Tokenizers meta ts
106 -> Either Error_Term_Gram
107 ( Maybe (Term_ProTok Unifix meta ts)
108 , Term_ProTok Infix meta ts
109 , Maybe (Term_ProTok Unifix meta ts)
110 )
111 protok (mod `Mod` tn) (Tokenizers pres ins posts) = do
112 let pre = Map.lookup mod pres >>= Map.lookup tn
113 let post = Map.lookup mod posts >>= Map.lookup tn
114 in_ <- var_or_err $ Map.lookup mod ins >>= Map.lookup tn
115 return (pre, in_, post)
116 where
117 var_or_err (Just x) = Right x
118 var_or_err Nothing =
119 case mod of
120 [] -> Right $ var infixN5
121 _ -> Left $ Error_Term_Gram_Undefined_term
122 var term_fixity =
123 Term_ProTok
124 { term_protok = \meta -> ProTok $ inj_EToken meta $ Token_Term_Var tn
125 , term_fixity
126 }
127
128 protok_app
129 :: Inj_Token meta ts (->)
130 => ProTok meta ts
131 -> [Either (EToken meta '[Proxy Token_Type]) (EToken meta ts)]
132 -> Either Error_Term_Gram (ProTok meta ts)
133 protok_app =
134 foldM app
135 where
136 app acc (Left typ) =
137 case acc of
138 ProTokPi g -> Right $ g typ
139 _ -> Left Error_Term_Gram_Cannot_apply_type
140 app acc (Right te) =
141 case acc of
142 ProTokLam f -> Right $ f te
143 ProTok tok@(EToken e) -> Right $
144 ProTok $ inj_EToken (meta_of e) $
145 Token_Term_App tok te
146 _ -> Left Error_Term_Gram_Cannot_apply_term
147
148 -- ** Class 'TokenizeR'
149 class TokenizeR meta (ts::[*]) (rs::[*]) where
150 tokenizeR :: Proxy rs -> Tokenizers meta ts
151 instance TokenizeR meta ts '[] where
152 tokenizeR _rs = mempty
153 instance
154 ( TokenizeT meta ts t
155 , TokenizeR meta ts rs
156 ) => TokenizeR meta ts (t ': rs) where
157 tokenizeR _ =
158 tokenizeR (Proxy @rs) <>
159 tokenizeT (Proxy @t)
160
161 -- ** Class 'TokenizeT'
162 class TokenizeT meta ts t where
163 tokenizeT :: Proxy t -> Tokenizers meta ts
164 -- tokenizeT _t = [] `Mod` []
165 tokenizeT _t = mempty
166
167 tokenizeTMod
168 :: Mod_Path
169 -> [(Term_Name, Term_ProTok fix meta ts)]
170 -> Map Mod_Path (Map Term_Name (Term_ProTok fix meta ts))
171 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
172
173 tokenize0
174 :: Inj_Token meta ts t
175 => Text -> fixity -> TokenT meta ts (Proxy t)
176 -> (Term_Name, Term_ProTok fixity meta ts)
177 tokenize0 n term_fixity tok =
178 (Term_Name n,) Term_ProTok
179 { term_protok = \meta -> ProTok $ inj_EToken meta $ tok
180 , term_fixity }
181
182 tokenize1
183 :: Inj_Token meta ts t
184 => Text -> fixity
185 -> (EToken meta ts -> TokenT meta ts (Proxy t))
186 -> (Term_Name, Term_ProTok fixity meta ts)
187 tokenize1 n term_fixity tok =
188 (Term_Name n,) Term_ProTok
189 { term_protok = \meta ->
190 ProTokLam $ \a ->
191 ProTok $ inj_EToken meta $ tok a
192 , term_fixity }
193
194 tokenize2
195 :: Inj_Token meta ts t
196 => Text -> fixity
197 -> (EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
198 -> (Term_Name, Term_ProTok fixity meta ts)
199 tokenize2 n term_fixity tok =
200 (Term_Name n,) Term_ProTok
201 { term_protok = \meta ->
202 ProTokLam $ \a -> ProTokLam $ \b ->
203 ProTok $ inj_EToken meta $ tok a b
204 , term_fixity
205 }
206
207 tokenize3
208 :: Inj_Token meta ts t
209 => Text -> fixity
210 -> (EToken meta ts -> EToken meta ts -> EToken meta ts -> TokenT meta ts (Proxy t))
211 -> (Term_Name, Term_ProTok fixity meta ts)
212 tokenize3 n term_fixity tok =
213 (Term_Name n,) Term_ProTok
214 { term_protok = \meta ->
215 ProTokLam $ \a -> ProTokLam $ \b -> ProTokLam $ \c ->
216 ProTok $ inj_EToken meta $ tok a b c
217 , term_fixity
218 }
219
220 -- * Type 'Mod'
221 type Mod_Path = [Mod_Name]
222 newtype Mod_Name = Mod_Name Text
223 deriving (Eq, Ord, Show)
224 data Mod a = Mod Mod_Path a
225 deriving (Eq, Functor, Ord, Show)
226
227 -- * Class 'Gram_Term_Name'
228 class
229 ( Alt g
230 , Alter g
231 , Alter g
232 , App g
233 , Try g
234 , Gram_CF g
235 , Gram_Op g
236 , Gram_Lexer g
237 , Gram_RegL g
238 , Gram_Rule g
239 , Gram_Terminal g
240 ) => Gram_Term_Name g where
241 mod_path :: CF g Mod_Path
242 mod_path = rule "mod_path" $
243 infixrG
244 (pure <$> mod_name)
245 (op <$ char '.')
246 where op = (<>)
247 mod_name :: CF g Mod_Name
248 mod_name = rule "mod_name" $
249 (Mod_Name . Text.pack <$>) $
250 identG `minus`
251 (Fun.const
252 <$> term_keywords
253 <*. (any `but` term_idname_tail))
254 where
255 identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail)
256 headG = unicat $ Unicat Char.UppercaseLetter
257
258 term_mod_name :: CF g (Mod Term_Name)
259 term_mod_name = rule "term_mod_name" $
260 lexeme $
261 term_mod_idname <+>
262 parens term_mod_opname
263 term_name :: CF g Term_Name
264 term_name = rule "term_name" $
265 lexeme $
266 term_idname <+>
267 parens term_opname
268
269 term_mod_idname :: CF g (Mod Term_Name)
270 term_mod_idname = rule "term_mod_idname" $
271 Mod
272 <$> option [] (try $ mod_path <* char '.')
273 <*> term_idname
274 term_idname :: CF g Term_Name
275 term_idname = rule "term_idname" $
276 (Term_Name . Text.pack <$>) $
277 (identG `minus`) $
278 Fun.const
279 <$> term_keywords
280 <*. (any `but` term_idname_tail)
281 where
282 identG = (:) <$> headG <*> many (cf_of_Terminal term_idname_tail)
283 headG = unicat $ Unicat_Letter
284 term_idname_tail :: Terminal g Char
285 term_idname_tail = rule "term_idname_tail" $
286 unicat Unicat_Letter <+>
287 unicat Unicat_Number
288 term_keywords :: Reg rl g String
289 term_keywords = rule "term_keywords" $
290 choice $ string <$> ["in", "let"]
291
292 term_mod_opname :: CF g (Mod Term_Name)
293 term_mod_opname = rule "term_mod_opname" $
294 Mod
295 <$> option [] (try $ mod_path <* char '.')
296 <*> term_opname
297 term_opname :: CF g Term_Name
298 term_opname = rule "term_opname" $
299 (Term_Name . Text.pack <$>) $
300 (symG `minus`) $
301 Fun.const
302 <$> term_keysyms
303 <*. (any `but` term_opname_ok)
304 where
305 symG = some $ cf_of_Terminal term_opname_ok
306 term_opname_ok :: Terminal g Char
307 term_opname_ok = rule "term_opname_ok" $
308 choice (unicat <$>
309 [ Unicat_Symbol
310 , Unicat_Punctuation
311 , Unicat_Mark
312 ]) `but` koG
313 where
314 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
315 term_keysyms :: Reg rl g String
316 term_keysyms = rule "term_keysyms" $
317 choice $ string <$> ["\\", "->", "="]
318
319 deriving instance Gram_Term_Name g => Gram_Term_Name (CF g)
320 instance Gram_Term_Name EBNF
321 instance Gram_Term_Name RuleDef
322
323 -- * Class 'Gram_Term_Type'
324 class
325 ( Alt g
326 , Alter g
327 , App g
328 , Gram_CF g
329 , Gram_Lexer g
330 , Gram_Meta meta g
331 , Gram_Rule g
332 , Gram_Terminal g
333 , Gram_Term_Name g
334 , Gram_Type meta g
335 ) => Gram_Term_Type meta g where
336 term_abst_decl
337 :: CF g (Term_Name, TokType meta)
338 term_abst_decl = rule "term_abst_decl" $
339 parens $ (,)
340 <$> term_name
341 <* symbol ":"
342 <*> typeG
343
344 deriving instance Gram_Term_Type meta g => Gram_Term_Type meta (CF g)
345 instance Gram_Term_Type meta EBNF
346 instance Gram_Term_Type meta RuleDef
347
348 -- * Class 'Gram_Error'
349 class Gram_Error g where
350 term_unError :: CF g (Either Error_Term_Gram a) -> CF g a
351 deriving instance Gram_Error g => Gram_Error (CF g)
352 instance Gram_Error EBNF where
353 term_unError (CF (EBNF g)) = CF $ EBNF g
354 instance Gram_Error RuleDef where
355 term_unError (CF (RuleDef (EBNF g))) =
356 CF $ RuleDef $ EBNF g
357
358 -- ** Type 'Error_Term_Gram'
359 data Error_Term_Gram
360 = Error_Term_Gram_Fixity Error_Fixity
361 | Error_Term_Gram_Cannot_apply_term
362 | Error_Term_Gram_Cannot_apply_type
363 | Error_Term_Gram_Undefined_term
364 | Error_Term_Gram_Term_incomplete
365 deriving (Eq, Show)
366
367 -- * Class 'Gram_Term'
368 class
369 ( Alt g
370 , Alter g
371 , App g
372 , Gram_CF g
373 , Gram_Lexer g
374 , Gram_Meta meta g
375 , Gram_Rule g
376 , Gram_Terminal g
377 , Gram_Error g
378 , Gram_Term_AtomsR meta ts ts g
379 , Gram_Term_Name g
380 , Gram_Term_Type meta g
381 , Gram_Type meta g
382 ) => Gram_Term ts meta g where
383 -- | Wrap 'term_abst'. Useful to modify body's scope.
384 term_abst_args_body
385 :: CF g [(Term_Name, TokType meta)]
386 -> CF g (EToken meta ts)
387 -> CF g ([(Term_Name, TokType meta)], EToken meta ts)
388 term_abst_args_body args body = (,) <$> args <*> body
389 term_tokenizers :: CF g (Tokenizers meta ts -> a) -> CF g a
390
391 termG
392 :: Inj_Tokens meta ts '[Proxy (->)]
393 => CF g (EToken meta ts)
394 termG = rule "term" $
395 choice
396 [ try term_abst
397 , term_operators
398 , term_let
399 ]
400 term_operators
401 :: Inj_Tokens meta ts '[Proxy (->)]
402 => CF g (EToken meta ts)
403 term_operators = rule "term_operators" $
404 term_unError $
405 term_unError $
406 left Error_Term_Gram_Fixity <$>
407 operators
408 (Right <$> term_app)
409 (term_unError $ metaG $ term_tokenizers $ op_prefix <$> term_op_prefix)
410 (term_unError $ metaG $ term_tokenizers $ op_infix <$> term_op_infix)
411 (term_unError $ metaG $ term_tokenizers $ op_postfix <$> term_op_postfix)
412 where
413 bqG :: Gram_Terminal g' => g' Char
414 bqG = char '`'
415 op_infix name toks meta = do
416 (_pre, in_, _post) <- protok name toks
417 return $
418 (term_fixity in_,) $ \ma mb -> do
419 a <- ma
420 b <- mb
421 unProTok =<< term_protok in_ meta `protok_app` [Right a, Right b]
422 op_prefix name toks meta = do
423 (pre, _in_, _post) <- protok name toks
424 case pre of
425 Just p ->
426 Right $ (term_fixity p,) $ (=<<) $ \a ->
427 unProTok =<< term_protok p meta `protok_app` [Right a]
428 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
429 op_postfix name toks meta = do
430 (_pre, _in_, post) <- protok name toks
431 case post of
432 Just p ->
433 Right $ (term_fixity p,) $ (=<<) $ \a ->
434 unProTok =<< term_protok p meta `protok_app` [Right a]
435 Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
436 term_op_postfix :: CF g (Mod Term_Name)
437 term_op_postfix = rule "term_op_postfix" $
438 lexeme $
439 bqG *> term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` bqG)
440 term_mod_opname
441 term_op_infix :: CF g (Mod Term_Name)
442 term_op_infix = rule "term_op_infix" $
443 lexeme $
444 between bqG bqG term_mod_idname <+>
445 term_mod_opname
446 term_op_prefix :: CF g (Mod Term_Name)
447 term_op_prefix = rule "term_op_prefix" $
448 lexeme $
449 term_mod_idname <* bqG <+>
450 term_mod_opname
451 term_app
452 :: Inj_Tokens meta ts '[Proxy (->)]
453 => CF g (EToken meta ts)
454 term_app = rule "term_app" $
455 term_unError $
456 (\a as -> unProTok =<< protok_app a as)
457 <$> term_atom_proto
458 <*> many (try term_atom)
459 term_atom
460 :: Inj_Tokens meta ts '[Proxy (->)]
461 => CF g (Either (EToken meta '[Proxy Token_Type])
462 (EToken meta ts))
463 term_atom = rule "term_atom" $
464 (Left <$ char '@' <*> typeG) <+>
465 (Right <$> term_unError (unProTok <$> term_atom_proto))
466 term_atom_proto
467 :: Inj_Tokens meta ts '[Proxy (->)]
468 => CF g (ProTok meta ts)
469 term_atom_proto =
470 choice $
471 try <$> term_atomsR (Proxy @ts) <>
472 [ try $
473 metaG $ ((\(_, in_, _) -> term_protok in_) <$>) $
474 term_unError $
475 term_tokenizers $
476 protok <$> term_mod_name
477 , ProTok <$> term_group
478 ]
479 term_group
480 :: Inj_Tokens meta ts '[Proxy (->)]
481 => CF g (EToken meta ts)
482 term_group = rule "term_group" $ parens termG
483 term_abst
484 :: Inj_Tokens meta ts '[Proxy (->)]
485 => CF g (EToken meta ts)
486 term_abst = rule "term_abst" $
487 metaG $
488 ((\(xs, te) meta ->
489 foldr (\(x, ty_x) ->
490 inj_EToken meta .
491 Token_Term_Abst x ty_x) te xs) <$>) $
492 term_abst_args_body
493 (symbol "\\" *> some term_abst_decl <* symbol "->")
494 termG
495 term_let
496 :: Inj_Tokens meta ts '[Proxy (->)]
497 => CF g (EToken meta ts)
498 term_let = rule "term_let" $
499 metaG $
500 (\name args bound body meta ->
501 inj_EToken meta $
502 Token_Term_Let name
503 (foldr (\(x, ty_x) ->
504 inj_EToken meta . Token_Term_Abst x ty_x) bound args) body)
505 <$ symbol "let"
506 <*> term_name
507 <*> many term_abst_decl
508 <* symbol "="
509 <*> termG
510 <* symbol "in"
511 <*> termG
512
513 deriving instance
514 ( Gram_Term ts meta g
515 , Gram_Term_AtomsR meta ts ts (CF g)
516 ) => Gram_Term ts meta (CF g)
517 instance
518 Gram_Term_AtomsR meta ts ts EBNF =>
519 Gram_Term ts meta EBNF where
520 term_tokenizers (CF (EBNF g)) = CF $ EBNF g
521 instance
522 Gram_Term_AtomsR meta ts ts RuleDef =>
523 Gram_Term ts meta RuleDef where
524 term_tokenizers (CF (RuleDef (EBNF g))) =
525 CF $ RuleDef $ EBNF g
526
527 -- ** Class 'Gram_Term_AtomsR'
528 class Gram_Term_AtomsR meta (ts::[*]) (rs::[*]) g where
529 term_atomsR :: Proxy rs -> [CF g (ProTok meta ts)]
530 instance Gram_Term_AtomsR meta ts '[] g where
531 term_atomsR _rs = []
532 instance
533 ( Gram_Term_AtomsT meta ts t g
534 , Gram_Term_AtomsR meta ts rs g
535 ) => Gram_Term_AtomsR meta ts (t ': rs) g where
536 term_atomsR _ =
537 term_atomsT (Proxy @t) <>
538 term_atomsR (Proxy @rs)
539
540 -- ** Class 'Gram_Term_AtomsT'
541 class Gram_Term_AtomsT meta ts t g where
542 term_atomsT :: Proxy t -> [CF g (ProTok meta ts)]
543 term_atomsT _t = []
544 instance Gram_Term_AtomsT meta ts t RuleDef
545
546 gram_term
547 :: forall g.
548 ( Gram_Term '[Proxy (->), Proxy Integer] () g
549 ) => [CF g ()]
550 gram_term =
551 [ ue termG
552 , ue term_operators
553 , ue term_app
554 , ug term_atom
555 , ue term_group
556 , ue term_abst
557 , void (term_abst_decl::CF g (Term_Name, TokType ()))
558 , ue term_let
559 , void term_mod_name
560 , void term_name
561 , void term_idname
562 , void $ cf_of_Terminal term_idname_tail
563 , void $ cf_of_Reg term_keywords
564 , void term_mod_opname
565 , void term_opname
566 , void $ cf_of_Terminal term_opname_ok
567 , void $ cf_of_Reg term_keysyms
568 ] where
569 ue :: CF g (EToken () '[Proxy (->), Proxy Integer]) -> CF g ()
570 ue = (() <$)
571 -- uf :: CF g (ProTok () '[Proxy (->)]) -> CF g ()
572 -- uf = (() <$)
573 ug :: CF g (Either (EToken () '[Proxy Token_Type])
574 (EToken () '[Proxy (->), Proxy Integer])) -> CF g ()
575 ug = (() <$)