{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE UndecidableInstances #-} -- For Cursorable (Cursor inp) -- | Build the 'Instr'uction 'Program' of a 'Machine' -- from the 'Comb'inators of a 'Grammar'. -- 'Instr'uctions are kept introspectable -- to enable more optimizations made possible now because -- of a broader knowledge of the 'Instr'uctions around -- those generated (see for instance 'joinNext'). module Symantic.Parser.Machine.Program where import Control.Monad (Monad(..), (<=<), (=<<), liftM, liftM2, sequence) import Data.Function (($)) import System.IO (IO) import Type.Reflection (Typeable) import Control.DeepSeq (NFData) import Data.Bool (Bool(..)) import Data.Eq (Eq) import Data.Function ((.)) import Data.Ord (Ord) import Text.Show (Show(..)) import qualified Data.Functor as Functor import qualified Data.Set as Set import qualified Data.Traversable as Traversable import qualified Language.Haskell.TH as TH import qualified Language.Haskell.TH.Syntax as TH import qualified Symantic.Lang as Prod import Symantic.Derive import Symantic.Parser.Grammar import Symantic.Parser.Machine.Input import Symantic.Parser.Machine.Instructions import Symantic.Parser.Machine.Optimize -- * Type 'Program' -- | A 'Program' is a tree of 'Instr'uctions, -- where each 'Instr'uction is built by a continuation -- to be able to introspect, duplicate and/or change -- the next 'Instr'uction. data Program repr inp a = Program { unProgram :: forall vs ret. -- This is the next instruction. SomeInstr repr inp (a ': vs) ret -> -- This is the current instruction -- IO is needed for 'TH.newName'. IO (SomeInstr repr inp vs ret) } -- | Build an interpreter of the 'Program' of the given 'Machinable'. optimizeMachine :: forall inp repr a. Machinable (InputToken inp) repr => Program repr inp a -> IO (repr inp '[] a) optimizeMachine (Program f) = derive Functor.<$> f @'[] ret -- * Class 'Machinable' -- | All the 'Instr'uctions. type Machinable tok repr = ( InstrBranchable repr , InstrComment repr , InstrExceptionable repr , InstrInputable repr , InstrJoinable repr , InstrCallable repr , InstrValuable repr , InstrReadable tok repr , InstrIterable repr , InstrRegisterable repr , Eq tok , Ord tok , TH.Lift tok , NFData tok , Show tok , Typeable tok ) instance ( Cursorable (Cursor inp) , InstrBranchable repr , InstrExceptionable repr , InstrInputable repr , InstrJoinable repr , InstrValuable repr ) => CombAlternable (Program repr inp) where alt exn (Program l) (Program r) = joinNext $ Program $ \next -> liftM2 (catch exn) (l (commit exn next)) (raiseAgainIfConsumed exn Functor.<$> r next) throw exn = Program $ \_next -> return $ raise exn failure flr = Program $ \_next -> return $ fail (Set.singleton flr) empty = Program $ \_next -> return $ fail (Set.singleton (SomeFailure FailureEmpty)) try (Program x) = Program $ \next -> liftM2 (catch ExceptionFailure) (x (commit ExceptionFailure next)) -- On exception, reset the input, and propagate the failure. (return $ loadInput $ fail Set.empty) -- | @(raiseAgainIfConsumed exn ok)@ -- compares the stacked input position with the current one, -- in case they're the same then continue with @(ok)@, -- otherwise, re-'raise' @(exn)@, -- without updating the farthest error -- (which is usually done when 'fail'ing). raiseAgainIfConsumed :: Cursorable (Cursor inp) => InstrBranchable repr => InstrExceptionable repr => InstrInputable repr => InstrValuable repr => Exception -> SomeInstr repr inp vs ret -> SomeInstr repr inp (Cursor inp ': vs) ret raiseAgainIfConsumed exn ok = pushInput $ lift2Value (splice sameOffset) $ ifBranch ok $ case exn of ExceptionLabel lbl -> raise lbl ExceptionFailure -> fail Set.empty -- | @('joinNext' m)@ factorize the next 'Instr'uction -- to be able to reuse it multiple times without duplication. -- It does so by introducing a 'defJoin' -- and passing the corresponding 'refJoin' -- as next 'Instr'uction to @(m)@, -- unless factorizing is useless because the next 'Instr'uction -- is already a 'refJoin' or a 'ret'. -- It should be used each time the next 'Instr'uction -- is used multiple times. joinNext :: InstrJoinable repr => Program repr inp v -> Program repr inp v joinNext (Program m) = Program $ \case -- Double refJoin Optimization: -- If a join-node points directly to another join-node, -- then reuse it next@(Instr RefJoin{}) -> m next -- If a join-node points directly to a 'jump', -- then reuse it. -- Because 'Jump' expects an empty 'valueStack', -- a 'PopValue' has to be here to drop -- the value normaly expected by the 'next' 'Instr'uction. next@(Instr (PopValue (Instr Jump{}))) -> m next -- Terminal refJoin Optimization: -- If a join-node points directly to a terminal operation, -- then it's useless to introduce a join-node. next@(Instr Ret{}) -> m next -- Introduce a join-node. next -> do !joinName <- TH.newName "join" defJoin (LetName joinName) next Functor.<$> m (refJoin (LetName joinName)) instance InstrValuable repr => CombApplicable (Program repr inp) where pure x = Program $ return . pushValue (prodCode x) Program f <*> Program x = Program $ (f <=< x) . applyValue liftA2 f (Program x) (Program y) = Program $ (x <=< y) . lift2Value (prodCode f) Program x *> Program y = Program (x <=< return . popValue <=< y) Program x <* Program y = Program (x <=< y <=< return . popValue) instance ( Cursorable (Cursor inp) , InstrBranchable repr , InstrCallable repr , InstrExceptionable repr , InstrInputable repr , InstrIterable repr , InstrJoinable repr , InstrRegisterable repr , InstrValuable repr ) => CombFoldable (Program repr inp) where chainPre (Program op) (Program done) = new (pure Prod.id) $ \(Register r) -> Program $ \next -> do !loopName <- TH.newName "loop" liftM2 (iter (LetName loopName)) (op $ mapValue (Prod.flip Prod..@ (Prod..)) $ modifyRegister r $ jump True (LetName loopName) ) (raiseAgainIfConsumed ExceptionFailure . readRegister r Functor.<$> (done (applyValue next))) chainPost (Program done) (Program op) = new (pure Prod.id) $ \(Register r) -> Program $ \next -> do !loopName <- TH.newName "loop" liftM2 (iter (LetName loopName)) (op $ modifyRegister (UnscopedRegister (unUnscopedRegister r)) $ jump True (LetName loopName) ) (raiseAgainIfConsumed ExceptionFailure . readRegister r Functor.<$> (done (applyValue next))) instance InstrCallable repr => Referenceable TH.Name (Program repr inp) where -- TODO: isRec should be passed to 'call' and 'jump' -- instead of redoing the work with 'CallTrace'. ref isRec name = Program $ \case -- Tail Call Optimization: -- returning just after a 'call' is useless: -- using 'jump' lets the 'ret' of the 'defLet' -- directly return where it would in two 'ret's. Instr Ret{} -> return $ jump isRec (LetName name) next -> return $ call isRec (LetName name) next {- refable n (Program sub) = Program $ \next -> do sub' <- sub ret return $ defLet (HM.singleton n (SomeLet sub')) (call (LetName n) next) -} instance InstrCallable repr => Letsable TH.Name (Program repr inp) where lets defs (Program body) = Program $ \next -> do -- Every definition becomes a 'call'able subroutine. defs' <- Traversable.traverse (\(SomeLet (Program val)) -> liftM SomeLet (val ret)) defs liftM (defLet defs') (body next) instance ( Eq (InputToken inp) , Cursorable (Cursor inp) , InstrBranchable repr , InstrExceptionable repr , InstrInputable repr , InstrJoinable repr , InstrReadable (InputToken inp) repr , Typeable (InputToken inp) , InstrValuable repr , InstrComment repr ) => CombLookable (Program repr inp) where look (Program x) = Program $ \next -> liftM (comment "look") $ liftM pushInput (x (swapValue (loadInput next))) eof = negLook (satisfy (Prod.const Prod..@ Prod.bool True)) -- This sets a better failure message <|> (Program $ \_next -> return $ comment "eof.fail" $ fail (Set.singleton (SomeFailure FailureEnd))) negLook (Program x) = Program $ \next -> liftM (comment "negLook") $ liftM2 (catch ExceptionFailure) -- On x success, discard the result, -- and replace this 'Catcher' by a failure whose 'farthestExpecting' is negated, -- then a failure is raised from the input -- when entering 'negLook', to avoid odd cases: -- - where the failure that made (negLook x) -- succeed can get the blame for the overall -- failure of the grammar. -- - where the overall failure of -- the grammar might be blamed on something in x -- that, if corrected, still makes x succeed -- and (negLook x) fail. ( liftM (comment "negLook.ahead") $ liftM pushInput $ x $ popValue $ commit ExceptionFailure $ loadInput $ fail Set.empty ) -- On x failure, reset the input, -- and go on with the next 'Instr'uctions. ( liftM (comment "negLook.reset") $ return $ loadInput $ pushValue Prod.unit next ) instance ( InstrBranchable repr , InstrJoinable repr ) => CombMatchable (Program repr inp) where conditional (Program a) bs (Program d) = joinNext $ Program $ \next -> do bs' <- Control.Monad.sequence $ (\(p, b) -> (prodCode p,) Functor.<$> unProgram b next) Functor.<$> bs a =<< liftM (choicesBranch bs') (d next) instance ( tok ~ InputToken inp , InstrReadable tok repr , InstrComment repr , Typeable tok ) => CombSatisfiable tok (Program repr inp) where satisfyOrFail fs p = Program $ \next -> return $ comment "satisfy" $ read fs (prodCode p) next instance ( InstrBranchable repr , InstrJoinable repr , InstrValuable repr ) => CombSelectable (Program repr inp) where branch (Program lr) (Program l) (Program r) = joinNext $ Program $ \next -> lr =<< liftM2 caseBranch (l (swapValue (applyValue next))) (r (swapValue (applyValue next))) instance ( InstrValuable repr , InstrRegisterable repr ) => CombRegisterable (Program repr inp) where new (Program p) k = Program $ \next -> do !regName <- TH.newName "reg" p =<< liftM (newRegister (UnscopedRegister regName)) (unProgram (k (Register (UnscopedRegister regName))) next) get (Register r) = Program $ \next -> return $ readRegister r next put (Register r) (Program k) = Program $ \next -> k $ writeRegister r $ pushValue Prod.unit next instance ( InstrValuable repr , InstrRegisterable repr ) => CombRegisterableUnscoped (Program repr inp) where newUnscoped r (Program p) k = Program $ \next -> p =<< liftM (newRegister r) (unProgram k next) getUnscoped r = Program $ return . readRegister r putUnscoped r (Program k) = Program $ k . writeRegister r . pushValue Prod.unit