1 {-# LANGUAGE ConstraintKinds #-}
 
   2 {-# LANGUAGE DeriveFunctor #-}
 
   4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 
   5 {-# LANGUAGE PolyKinds #-}
 
   6 {-# LANGUAGE UndecidableInstances #-}
 
   7 {-# OPTIONS_GHC -fno-warn-orphans #-}
 
   8 module Language.Symantic.Compiling.Term.Grammar where
 
  10 import Control.Arrow (left)
 
  11 import Control.Monad (void)
 
  12 import Data.Map.Strict (Map)
 
  13 import Data.Proxy (Proxy(..))
 
  14 import Data.Semigroup (Semigroup(..))
 
  15 import Data.Text (Text)
 
  16 import Prelude hiding (mod, not, any)
 
  17 import qualified Data.Char as Char
 
  18 import qualified Data.Function as Fun
 
  19 import qualified Data.Map.Strict as Map
 
  20 import qualified Data.Text as Text
 
  22 import Language.Symantic.Parsing
 
  23 import Language.Symantic.Typing
 
  26 type Mod_Path = [Mod_Name]
 
  27 newtype Mod_Name = Mod_Name Text
 
  28  deriving (Eq, Ord, Show)
 
  29 data Mod a = Mod Mod_Path a
 
  30  deriving (Eq, Functor, Ord, Show)
 
  33 -- type AST_Term src ss = BinTree (EToken src ss)
 
  34 type AST_Term src ss = BinTree (Token_Term src ss)
 
  36 -- ** Type 'Token_Term'
 
  37 data Token_Term src ss
 
  38  =   Token_Term (EToken src ss)
 
  39  |   Token_Term_Abst src TeName (AST_Type src) (AST_Term src ss)
 
  40  |   Token_Term_App  src -- (AST_Term src ss) (AST_Term src ss)
 
  41  |   Token_Term_Var  src TeName
 
  42  |   Token_Term_Let  src TeName (AST_Term src ss) (AST_Term src ss)
 
  44  | Token_Term_Type    (AST_Type src)
 
  49  Eq (Token_Term src ss) where
 
  50         Token_Term x == Token_Term y = x == y
 
  51         (==) (Token_Term_Abst _ nx vx bx)
 
  52              (Token_Term_Abst _ ny vy by) =
 
  53                 nx == ny && vx == vy && bx == by
 
  54         (==) (Token_Term_App _)
 
  55              (Token_Term_App _) = True
 
  56         (==) (Token_Term_Var _ nx)
 
  57              (Token_Term_Var _ ny) = nx == ny
 
  58         (==) (Token_Term_Let _ nx vx bx)
 
  59              (Token_Term_Let _ ny vy by) =
 
  60                 nx == ny && vx == vy && bx == by
 
  62 instance Show_Token ss => Show (Token_Term src ss) where
 
  63         showsPrec p (Token_Term t) = showsPrec p t
 
  64         showsPrec _p (Token_Term_App _) = showString "Token_Term_App"
 
  65         showsPrec p (Token_Term_Abst _ n v b) =
 
  67                 showString "Token_Term_Abst" .
 
  68                 showChar ' ' . showsPrec 11 n .
 
  69                 showChar ' ' . showsPrec 11 v .
 
  70                 showChar ' ' . showsPrec 11 b
 
  71         showsPrec p (Token_Term_Var _ n) =
 
  73                 showString "Token_Term_Var" .
 
  74                 showChar ' ' . showsPrec 11 n
 
  75         showsPrec p (Token_Term_Let _ n v b) =
 
  77                 showString "Token_Term_Let" .
 
  78                 showChar ' ' . showsPrec 11 n .
 
  79                 showChar ' ' . showsPrec 11 v .
 
  80                 showChar ' ' . showsPrec 11 b
 
  86 -- ** Type 'Tokenizer'
 
  87 data Tokenizer fixy src ss
 
  89  {   tokenizer        :: src -> Token_Term src ss
 
  90  ,   tokenizer_fixity :: fixy
 
  93 -- ** Type 'Tokenizers'
 
  94 data Tokenizers src ss
 
  96  {   tokenizers_prefix  :: Map Mod_Path (Map TeName (Tokenizer Unifix src ss))
 
  97  ,   tokenizers_infix   :: Map Mod_Path (Map TeName (Tokenizer Infix  src ss))
 
  98  ,   tokenizers_postfix :: Map Mod_Path (Map TeName (Tokenizer Unifix src ss))
 
 102  ( Show (Tokenizer Unifix src ss)
 
 103  , Show (Tokenizer Infix  src ss)
 
 104  ) => Show (Tokenizers src ss)
 
 105 instance Semigroup (Tokenizers src ss) where
 
 108                  (Map.unionWith Map.union
 
 109                          (tokenizers_prefix x)
 
 110                          (tokenizers_prefix y))
 
 111                  (Map.unionWith Map.union
 
 113                          (tokenizers_infix y))
 
 114                  (Map.unionWith Map.union
 
 115                          (tokenizers_postfix x)
 
 116                          (tokenizers_postfix y))
 
 117 instance Monoid (Tokenizers src ss) where
 
 118         mempty  = Tokenizers Map.empty Map.empty Map.empty
 
 121 tokenizers :: forall src ss. Tokenize src ss => Tokenizers src ss
 
 122 tokenizers = tokenizeR (Proxy @ss)
 
 124 -- | Lookup the given 'Mod' 'TeName' into the given 'Tokenizers',
 
 125 -- returning for prefix, infix and postfix positions, when there is a match.
 
 131  -> ( Maybe (Tokenizer Unifix src ss)
 
 132     , Maybe (Tokenizer Infix  src ss)
 
 133     , Maybe (Tokenizer Unifix src ss)
 
 135 tokenizer_lookup mn@(mod `Mod` n) (Tokenizers pres ins posts) = do
 
 136         let pre  = Map.lookup mod pres  >>= Map.lookup n
 
 137         let post = Map.lookup mod posts >>= Map.lookup n
 
 142                          { tokenizer = Token_Term_App @src @ss
 
 143                          , tokenizer_fixity = Infix (Just AssocL) 9
 
 145                  _ -> Map.lookup mod ins >>= Map.lookup n
 
 148 -- ** Class 'TokenizeR'
 
 149 class TokenizeR src (ss::[*]) (rs::[*]) where
 
 150         tokenizeR :: Proxy rs -> Tokenizers src ss
 
 151 instance TokenizeR src ss '[] where
 
 152         tokenizeR _rs = mempty
 
 155  , TokenizeR src ss rs
 
 156  ) => TokenizeR src ss (Proxy s ': rs) where
 
 158                 tokenizeR (Proxy @rs) <>
 
 161 -- ** Class 'TokenizeT'
 
 162 class TokenizeT src ss s where
 
 163         tokenizeT :: Proxy s -> Tokenizers src ss
 
 164         tokenizeT _t = mempty
 
 168  -> [(TeName, Tokenizer fix src ss)]
 
 169  -> Map Mod_Path (Map TeName (Tokenizer fix src ss))
 
 170 tokenizeTMod mod tbl = Map.singleton mod $ Map.fromList tbl
 
 174  => Text -> fixity -> TokenT ss (Proxy s)
 
 175  -> (TeName, Tokenizer fixity src ss)
 
 176 token n tokenizer_fixity tok =
 
 177         (TeName n,) Tokenizer
 
 178          { tokenizer = \src -> Token_Term $ inj_EToken src tok
 
 181 -- * Class 'Gram_Name'
 
 194  ) => Gram_Name g where
 
 195         g_mod_path :: CF g Mod_Path
 
 196         g_mod_path = rule "mod_path" $
 
 198                  (pure <$> g_mod_name)
 
 201         g_mod_name :: CF g Mod_Name
 
 202         g_mod_name = rule "mod_name" $
 
 203                 (Mod_Name . Text.pack <$>) $
 
 207                  <*. (any `but` g_term_idname_tail)
 
 209                 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
 
 210                 headG  = unicat $ Unicat Char.UppercaseLetter
 
 212         g_term_mod_name :: CF g (Mod TeName)
 
 213         g_term_mod_name = rule "term_mod_name" $
 
 215                         g_term_mod_idname <+>
 
 216                         parens g_term_mod_opname
 
 217         g_term_name :: CF g TeName
 
 218         g_term_name = rule "term_name" $
 
 223         g_term_mod_idname :: CF g (Mod TeName)
 
 224         g_term_mod_idname = rule "term_mod_idname" $
 
 226                  <$> option [] (try $ g_mod_path <* char '.')
 
 228         g_term_idname :: CF g TeName
 
 229         g_term_idname = rule "term_idname" $
 
 230                 (TeName . Text.pack <$>) $
 
 234                  <*. (any `but` g_term_idname_tail)
 
 236                 identG = (:) <$> headG <*> many (cf_of_Terminal g_term_idname_tail)
 
 237                 headG  = unicat $ Unicat_Letter
 
 238         g_term_idname_tail :: Terminal g Char
 
 239         g_term_idname_tail = rule "term_idname_tail" $
 
 240                 unicat Unicat_Letter <+>
 
 242         g_term_keywords :: Reg rl g String
 
 243         g_term_keywords = rule "term_keywords" $
 
 244                 choice $ string <$> ["in", "let"]
 
 246         g_term_mod_opname :: CF g (Mod TeName)
 
 247         g_term_mod_opname = rule "term_mod_opname" $
 
 249                  <$> option [] (try $ g_mod_path <* char '.')
 
 251         g_term_opname :: CF g TeName
 
 252         g_term_opname = rule "term_opname" $
 
 253                 (TeName . Text.pack <$>) $
 
 257                  <*. (any `but` g_term_opname_ok)
 
 259                 symG = some $ cf_of_Terminal g_term_opname_ok
 
 260         g_term_opname_ok :: Terminal g Char
 
 261         g_term_opname_ok = rule "term_opname_ok" $
 
 268                 koG = choice (char <$> ['(', ')', '`', '\'', ',', '[', ']'])
 
 269         g_term_keysyms :: Reg rl g String
 
 270         g_term_keysyms = rule "term_keysyms" $
 
 271                 choice $ string <$> ["\\", "->", "=", "@"]
 
 273 deriving instance Gram_Name g => Gram_Name (CF g)
 
 274 instance Gram_Name EBNF
 
 275 instance Gram_Name RuleDef
 
 277 -- * Class 'Gram_Term_Type'
 
 289  ) => Gram_Term_Type src g where
 
 291          :: CF g (TeName, AST_Type src)
 
 292         g_term_abst_decl = rule "term_abst_decl" $
 
 298 deriving instance Gram_Term_Type src g => Gram_Term_Type src (CF g)
 
 299 instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src EBNF
 
 300 instance Inj_Source (Text_of_Source src) src => Gram_Term_Type src RuleDef
 
 302 -- * Class 'Gram_Error'
 
 303 class Gram_Error g where
 
 304         errorG :: CF g (Either Error_Term_Gram a) -> CF g a
 
 305 deriving instance Gram_Error g => Gram_Error (CF g)
 
 306 instance Gram_Error EBNF where
 
 307         errorG (CF (EBNF g)) = CF $ EBNF g
 
 308 instance Gram_Error RuleDef where
 
 309         errorG (CF (RuleDef (EBNF g))) =
 
 310                 CF $ RuleDef $ EBNF g
 
 312 -- ** Type 'Error_Term_Gram'
 
 314  =   Error_Term_Gram_Fixity Error_Fixity
 
 315  |   Error_Term_Gram_Term_incomplete
 
 316  |   Error_Term_Gram_Type_applied_to_nothing
 
 317  |   Error_Term_Gram_not_applicable
 
 318  |   Error_Term_Gram_application
 
 319  |   Error_Term_Gram_application_mismatch
 
 322 -- * Class 'Gram_Term'
 
 333  , Gram_Term_AtomsR src ss ss g
 
 335  , Gram_Term_Type src g
 
 338  ) => Gram_Term ss src g where
 
 339         tokenizers_get :: CF g (Tokenizers src ss -> a) -> CF g a
 
 340         tokenizers_put :: CF g (Tokenizers src ss, a) -> CF g a
 
 343          => CF g (AST_Term src ss)
 
 344         g_term = rule "term" $
 
 352          => CF g (AST_Term src ss)
 
 353         g_term_operators = rule "term_operators" $
 
 355                 left Error_Term_Gram_Fixity <$>
 
 358                 g_ops :: CF g (Either Error_Fixity (AST_Term src ss))
 
 359                 g_ops = operators g_term_atom g_prefix g_infix g_postfix
 
 360                 g_prefix  :: CF g (Unifix, AST_Term src ss -> AST_Term src ss)
 
 361                 g_infix   :: CF g (Infix,  AST_Term src ss -> AST_Term src ss -> AST_Term src ss)
 
 362                 g_postfix :: CF g (Unifix, AST_Term src ss -> AST_Term src ss)
 
 363                 g_prefix  = errorG $ metaG $ tokenizers_get $ op_prefix  <$> g_prefix_op
 
 364                 g_infix   = errorG $ metaG $ tokenizers_get $ op_infix   <$> g_infix_op
 
 365                 g_postfix = errorG $ metaG $ tokenizers_get $ op_postfix <$> g_postfix_op
 
 370                  -> Either Error_Term_Gram
 
 371                            (Infix, AST_Term src ss -> AST_Term src ss -> AST_Term src ss)
 
 372                 op_infix name toks src = do
 
 373                         let (_pre, in_, _post) = tokenizer_lookup name toks
 
 375                          Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix
 
 377                                 Right $ (tokenizer_fixity p,) $ \a b ->
 
 378                                         (BinTree0 (tokenizer p src) `BinTree1` a) `BinTree1` b
 
 379                 op_prefix, op_postfix
 
 383                  -> Either Error_Term_Gram
 
 385                            , AST_Term src ss -> AST_Term src ss )
 
 386                 op_prefix name toks src = do
 
 387                         let (pre, _in_, _post) = tokenizer_lookup name toks
 
 389                          Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPrefix
 
 391                                 Right $ (tokenizer_fixity p,) $ \a ->
 
 392                                         BinTree0 (tokenizer p src) `BinTree1` a
 
 393                 op_postfix name toks src = do
 
 394                         let (_pre, _in_, post) = tokenizer_lookup name toks
 
 396                          Nothing -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedPostfix
 
 398                                 Right $ (tokenizer_fixity p,) $ \a ->
 
 399                                         BinTree0 (tokenizer p src) `BinTree1` a
 
 400                 g_postfix_op :: CF g (Mod TeName)
 
 401                 g_postfix_op = rule "term_op_postfix" $
 
 403                                 g_backquote *> g_term_mod_idname <+> -- <* (cf_of_Terminal $ Gram.Term (pure ' ') `but` g_backquote)
 
 405                 g_infix_op :: CF g (Mod TeName)
 
 406                 g_infix_op = rule "term_op_infix" $
 
 408                                 between g_backquote g_backquote g_term_mod_idname <+>
 
 409                                 try (Fun.const <$> g_term_mod_opname <*> (string " " <+> string "\n")) <+>
 
 411                 g_prefix_op :: CF g (Mod TeName)
 
 412                 g_prefix_op = rule "term_op_prefix" $
 
 414                                 g_term_mod_idname <* g_backquote <+>
 
 416                 g_backquote :: Gram_Terminal g' => g' Char
 
 417                 g_backquote = char '`'
 
 421          => CF g (AST_Term src ss)
 
 422         g_term_atom = rule "term_atom" $
 
 426                         (\typ src -> BinTree0 $ inj_EToken src $ Token_Term_Type typ)
 
 427                          <$ char '@' <*> g_type) :) $
 
 429                  (try <$> gs_term_atomsR (Proxy @ss)) <>
 
 431                         errorG $ metaG $ tokenizers_get $
 
 433                                 let (_, in_, _) = tokenizer_lookup mn toks
 
 435                                  Just p  -> Right $ BinTree0 $ tokenizer p src
 
 438                                          [] `Mod` n -> Right $ BinTree0 $ Token_Term_Var src n
 
 439                                          _ -> Left $ Error_Term_Gram_Fixity Error_Fixity_NeedInfix
 
 440                          ) <$> g_term_mod_name
 
 445          => CF g (AST_Term src ss)
 
 446         g_term_group = rule "term_group" $ parens g_term
 
 449          => CF g (AST_Term src ss)
 
 450         g_term_abst = rule "term_abst" $
 
 454                                 BinTree0 . Token_Term_Abst src x ty_x) te xs) <$>) $
 
 455                 g_term_abst_args_body
 
 456                  (symbol "\\" *> some g_term_abst_decl <* symbol "->")
 
 458         g_term_abst_args_body
 
 459          :: CF g [(TeName, AST_Type src)]
 
 460          -> CF g (AST_Term src ss)
 
 461          -> CF g ([(TeName, AST_Type src)], AST_Term src ss)
 
 462         -- g_term_abst_args_body args body = (,) <$> args <*> body
 
 463         g_term_abst_args_body cf_args cf_body =
 
 464                 tokenizers_put $ tokenizers_get $
 
 465                 (\a b (toks::Tokenizers src ss) -> (toks, (a, b)))
 
 466                  <$> (tokenizers_put $ tokenizers_get $
 
 467                         (\args (toks::Tokenizers src ss) -> (,args)
 
 469                                  { tokenizers_prefix  = del (tokenizers_prefix  toks) args
 
 470                                  , tokenizers_infix   = ins (tokenizers_infix   toks) args
 
 471                                  , tokenizers_postfix = del (tokenizers_postfix toks) args
 
 475                 del = foldr $ \(n, _) -> Map.adjust (Map.delete n) []
 
 476                 ins = foldr $ \(n, _) ->
 
 477                         Map.insertWith (<>) [] $
 
 480                          { tokenizer        = \src -> Token_Term_Var src n
 
 481                          , tokenizer_fixity = infixN5
 
 485          => CF g (AST_Term src ss)
 
 486         g_term_let = rule "term_let" $
 
 488                 (\name args bound body src ->
 
 490                         Token_Term_Let src name
 
 491                          (foldr (\(x, ty_x) ->
 
 492                                 BinTree0 . Token_Term_Abst src x ty_x) bound args) body)
 
 495                  <*> many g_term_abst_decl
 
 503  , Gram_Term_AtomsR src ss ss (CF g)
 
 504  ) => Gram_Term ss src (CF g)
 
 506  ( Gram_Term_AtomsR src ss ss EBNF
 
 508  , Inj_Source (Text_of_Source src) src
 
 509  ) => Gram_Term ss src EBNF where
 
 510         tokenizers_get (CF (EBNF g)) = CF $ EBNF g
 
 511         tokenizers_put (CF (EBNF g)) = CF $ EBNF g
 
 513  ( Gram_Term_AtomsR src ss ss RuleDef
 
 515  , Inj_Source (Text_of_Source src) src
 
 516  ) => Gram_Term ss src RuleDef where
 
 517         tokenizers_get (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g
 
 518         tokenizers_put (CF (RuleDef (EBNF g))) = CF $ RuleDef $ EBNF g
 
 520 -- ** Class 'Gram_Term_AtomsR'
 
 521 class Gram_Term_AtomsR src (ss::[*]) (rs::[*]) g where
 
 522         gs_term_atomsR :: Proxy rs -> [CF g (AST_Term src ss)]
 
 523 instance Gram_Term_AtomsR src ss '[] g where
 
 524         gs_term_atomsR _rs = []
 
 526  ( Gram_Term_AtomsT src ss t  g
 
 527  , Gram_Term_AtomsR src ss rs g
 
 528  ) => Gram_Term_AtomsR src ss (t ': rs) g where
 
 530                 gs_term_atomsT (Proxy @t) <>
 
 531                 gs_term_atomsR (Proxy @rs)
 
 533 -- ** Class 'Gram_Term_AtomsT'
 
 534 class Gram_Term_AtomsT src ss t g where
 
 535         gs_term_atomsT :: Proxy t -> [CF g (AST_Term src ss)]
 
 536         gs_term_atomsT _t = []
 
 537 -- instance Gram_Term_AtomsT src ss t RuleDef
 
 541  ( Gram_Term '[Proxy (->), Proxy Integer] () g
 
 545  , voiD g_term_operators
 
 549  , void (g_term_abst_decl::CF g (TeName, AST_Type ()))
 
 551  , void g_term_mod_name
 
 554  , void $ cf_of_Terminal g_term_idname_tail
 
 555  , void $ cf_of_Reg g_term_keywords
 
 556  , void g_term_mod_opname
 
 558  , void $ cf_of_Terminal g_term_opname_ok
 
 559  , void $ cf_of_Reg g_term_keysyms
 
 561         voiD :: CF g (AST_Term () '[Proxy (->), Proxy Integer]) -> CF g ()