{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE StandaloneDeriving #-} -- For Show (ParsingError inp) {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE UnboxedTuples #-} -- For nextInput {-# LANGUAGE UndecidableInstances #-} -- For Show (ParsingError inp) module Symantic.Parser.Machine.Generate where import Control.Monad (Monad(..)) import Data.Bool (Bool) import Data.Char (Char) import Data.Either (Either(..)) import Data.Function (($), (.)) import Data.Functor ((<$>)) import Data.Int (Int) import Data.List (minimum) import Data.Map (Map) import Data.Maybe (Maybe(..)) import Data.Ord (Ord(..), Ordering(..)) import Data.Semigroup (Semigroup(..)) import Data.Set (Set) import Language.Haskell.TH (CodeQ, Code(..)) import Prelude ((+), (-)) import Text.Show (Show(..)) import qualified Data.Map.Strict as Map import qualified Data.Set as Set import qualified Language.Haskell.TH.Syntax as TH -- import qualified Control.Monad.Trans.Writer as Writer import Symantic.Univariant.Trans import Symantic.Parser.Grammar.Combinators (ErrorItem(..)) import Symantic.Parser.Machine.Input import Symantic.Parser.Machine.Instructions import qualified Symantic.Parser.Haskell as H genCode :: TermInstr a -> CodeQ a genCode = trans -- * Type 'Gen' -- | Generate the 'CodeQ' parsing the input. data Gen inp vs es a = Gen { minHorizon :: Map TH.Name Horizon -> Horizon -- ^ Synthetized (bottom-up) minimal input length -- required by the parser to not fail. -- This requires a 'minHorizonByName' -- containing the minimal 'Horizon's of all the 'TH.Name's -- this parser 'call's, 'jump's or 'refJoin's to. , unGen :: GenCtx inp vs es a -> CodeQ (Either (ParsingError inp) a) } -- ** Type 'ParsingError' data ParsingError inp = ParsingErrorStandard { parsingErrorOffset :: Offset -- | Note that if an 'ErrorItemHorizon' greater than 1 -- is amongst the 'parsingErrorExpecting' -- then this is only the 'InputToken' -- at the begining of the expected 'Horizon'. , parsingErrorUnexpected :: Maybe (InputToken inp) , parsingErrorExpecting :: Set (ErrorItem (InputToken inp)) } deriving instance Show (InputToken inp) => Show (ParsingError inp) -- ** Type 'Offset' type Offset = Int -- ** Type 'Horizon' -- | Synthetized minimal input length -- required for a successful parsing. -- Used with 'checkedHorizon' to factorize input length checks, -- instead of checking the input length -- one 'InputToken' at a time at each 'read'. type Horizon = Offset -- ** Type 'Cont' type Cont inp v a = {-farthestInput-}Cursor inp -> {-farthestExpecting-}[ErrorItem (InputToken inp)] -> v -> Cursor inp -> Either (ParsingError inp) a -- ** Type 'FailHandler' type FailHandler inp a = {-failureInput-}Cursor inp -> {-farthestInput-}Cursor inp -> {-farthestExpecting-}[ErrorItem (InputToken inp)] -> Either (ParsingError inp) a {- -- *** Type 'FarthestError' data FarthestError inp = FarthestError { farthestInput :: Cursor inp , farthestExpecting :: [ErrorItem (InputToken inp)] } -} -- | @('generateCode' input mach)@ generates @TemplateHaskell@ code -- parsing the given 'input' according to the given 'Machine'. generateCode :: forall inp ret. Ord (InputToken inp) => Show (InputToken inp) => TH.Lift (InputToken inp) => -- InputToken inp ~ Char => Input inp => CodeQ inp -> Show (Cursor inp) => Gen inp '[] ('Succ 'Zero) ret -> CodeQ (Either (ParsingError inp) ret) generateCode input k = [|| -- Pattern bindings containing unlifted types -- should use an outermost bang pattern. let !(# init, readMore, readNext #) = $$(cursorOf input) in let finalRet = \_farInp _farExp v _inp -> Right v in let finalFail _failInp !farInp !farExp = Left ParsingErrorStandard { parsingErrorOffset = offset farInp , parsingErrorUnexpected = if readMore farInp then Just (let (# c, _ #) = readNext farInp in c) else Nothing , parsingErrorExpecting = Set.fromList farExp } in $$(unGen k GenCtx { valueStack = ValueStackEmpty , failStack = FailStackCons [||finalFail||] FailStackEmpty , retCode = [||finalRet||] , input = [||init||] , nextInput = [||readNext||] , moreInput = [||readMore||] -- , farthestError = [||Nothing||] , farthestInput = [||init||] , farthestExpecting = [|| [] ||] , checkedHorizon = 0 , minHorizonByName = Map.empty }) ||] -- ** Type 'GenCtx' -- | This is an inherited (top-down) context -- only present at compile-time, to build TemplateHaskell splices. data GenCtx inp vs (es::Peano) a = ( TH.Lift (InputToken inp) , Cursorable (Cursor inp) , Show (InputToken inp) -- , InputToken inp ~ Char ) => GenCtx { valueStack :: ValueStack vs , failStack :: FailStack inp a es --, failStacks :: FailStack inp es a , retCode :: CodeQ (Cont inp a a) , input :: CodeQ (Cursor inp) , moreInput :: CodeQ (Cursor inp -> Bool) , nextInput :: CodeQ (Cursor inp -> (# InputToken inp, Cursor inp #)) , farthestInput :: CodeQ (Cursor inp) , farthestExpecting :: CodeQ [ErrorItem (InputToken inp)] -- | Remaining horizon already checked. -- Updated by 'checkHorizon' -- and reset elsewhere when needed. , checkedHorizon :: Offset -- | Minimal horizon for each 'subroutine' or 'defJoin'. -- This can be done as an inherited attribute because -- 'OverserveSharing' introduces 'def' as an ancestor node -- of all the 'ref's pointing to it. -- Same for 'defJoin' and its 'refJoin's. , minHorizonByName :: Map TH.Name Offset } -- ** Type 'ValueStack' data ValueStack vs where ValueStackEmpty :: ValueStack '[] ValueStackCons :: { valueStackHead :: TermInstr v , valueStackTail :: ValueStack vs } -> ValueStack (v ': vs) -- ** Type 'FailStack' data FailStack inp a es where FailStackEmpty :: FailStack inp a 'Zero FailStackCons :: { failStackHead :: CodeQ (FailHandler inp a) , failStackTail :: FailStack inp a es } -> FailStack inp a ('Succ es) instance Stackable Gen where push x k = k { unGen = \ctx -> unGen k ctx { valueStack = ValueStackCons x (valueStack ctx) } } pop k = k { unGen = \ctx -> unGen k ctx { valueStack = valueStackTail (valueStack ctx) } } liftI2 f k = k { unGen = \ctx -> unGen k ctx { valueStack = let ValueStackCons y (ValueStackCons x xs) = valueStack ctx in ValueStackCons (f H.:@ x H.:@ y) xs } } swap k = k { unGen = \ctx -> unGen k ctx { valueStack = let ValueStackCons y (ValueStackCons x xs) = valueStack ctx in ValueStackCons x (ValueStackCons y xs) } } instance Branchable Gen where caseI kx ky = Gen { minHorizon = \ls -> minHorizon kx ls `min` minHorizon ky ls , unGen = \ctx -> let ValueStackCons v vs = valueStack ctx in [|| case $$(genCode v) of Left x -> $$(unGen kx ctx{ valueStack = ValueStackCons (H.Term [||x||]) vs }) Right y -> $$(unGen ky ctx{ valueStack = ValueStackCons (H.Term [||y||]) vs }) ||] } choices fs ks kd = Gen { minHorizon = \hs -> minimum $ minHorizon kd hs : (($ hs) . minHorizon <$> ks) , unGen = \ctx -> let ValueStackCons v vs = valueStack ctx in go ctx{valueStack = vs} v fs ks } where go ctx x (f:fs') (k:ks') = [|| if $$(genCode (f H.:@ x)) then $$(unGen k ctx) else $$(go ctx x fs' ks') ||] go ctx _ _ _ = unGen kd ctx instance Failable Gen where fail failExp = Gen { minHorizon = \_hs -> 0 , unGen = \ctx@GenCtx{} -> [|| let (# farInp, farExp #) = case $$compareOffset $$(farthestInput ctx) $$(input ctx) of LT -> (# $$(input ctx), failExp #) EQ -> (# $$(farthestInput ctx), ($$(farthestExpecting ctx) <> failExp) #) GT -> (# $$(farthestInput ctx), $$(farthestExpecting ctx) #) in $$(failStackHead (failStack ctx)) $$(input ctx) farInp farExp ||] } popFail k = k { unGen = \ctx -> unGen k ctx{failStack = failStackTail (failStack ctx)} } catchFail ok ko = Gen { minHorizon = \ls -> minHorizon ok ls `min` minHorizon ko ls , unGen = \ctx@GenCtx{} -> unGen ok ctx { failStack = FailStackCons [|| \ !failInp !farInp !farExp -> -- trace ("catchFail: " <> "farExp="<>show farExp) $ $$(unGen ko ctx -- Push the input as it was when entering the catchFail. { valueStack = ValueStackCons (H.Term (input ctx)) (valueStack ctx) -- Move the input to the failing position. , input = [||failInp||] -- Set the farthestInput to the farthest computed by 'fail' , farthestInput = [||farInp||] , farthestExpecting = [||farExp||] }) ||] (failStack ctx) } } instance Inputable Gen where loadInput k = k { unGen = \ctx -> let ValueStackCons input vs = valueStack ctx in unGen k ctx { valueStack = vs , input = genCode input , checkedHorizon = 0 } } pushInput k = k { unGen = \ctx -> unGen k ctx{valueStack = ValueStackCons (H.Term (input ctx)) (valueStack ctx)} } instance Routinable Gen where subroutine (LetName n) sub k = Gen { minHorizon = minHorizon k , unGen = \ctx -> Code $ TH.unsafeTExpCoerce $ do -- 'sub' is recursively 'call'able within 'sub', -- but its maximal 'minHorizon' is not known yet. let minHorizonByNameButSub = Map.insert n 0 (minHorizonByName ctx) body <- TH.unTypeQ $ TH.examineCode $ [|| -- buildRec in Parsley -- subroutine called by 'call' or 'jump' \ !ok{-from generateSuspend or retCode-} !inp !ko{-from failStackHead-} -> $$(unGen sub ctx { valueStack = ValueStackEmpty , failStack = FailStackCons [||ko||] FailStackEmpty , input = [||inp||] , retCode = [||ok||] -- These are passed by the caller via 'ok' or 'ko' -- , farthestInput = -- , farthestExpecting = -- Some callers can call this subroutine -- with zero checkedHorizon, hence use this minimum. -- TODO: maybe it could be improved a bit -- by taking the minimum of the checked horizons -- before all the 'call's and 'jump's to this subroutine. , checkedHorizon = 0 , minHorizonByName = minHorizonByNameButSub }) ||] let decl = TH.FunD n [TH.Clause [] (TH.NormalB body) []] expr <- TH.unTypeQ (TH.examineCode (unGen k ctx { minHorizonByName = -- 'sub' is 'call'able within 'k'. Map.insert n (minHorizon sub minHorizonByNameButSub) (minHorizonByName ctx) })) return (TH.LetE [decl] expr) } jump (LetName n) = Gen { minHorizon = (Map.! n) , unGen = \ctx -> [|| let _ = "jump" in $$(Code (TH.unsafeTExpCoerce (return (TH.VarE n)))) {-ok-}$$(retCode ctx) $$(input ctx) $$(failStackHead (failStack ctx)) ||] } call (LetName n) k = k { minHorizon = (Map.! n) , unGen = \ctx -> [|| let _ = "call" in $$(Code (TH.unsafeTExpCoerce (return (TH.VarE n)))) {-ok-}$$(generateSuspend k ctx) $$(input ctx) $$(failStackHead (failStack ctx)) ||] } ret = Gen { minHorizon = \_hs -> 0 , unGen = \ctx -> unGen (generateResume (retCode ctx)) ctx } -- | Generate a continuation to be called with 'generateResume', -- used when 'call' 'ret'urns. -- The return 'v'alue is 'push'ed on the 'valueStack'. generateSuspend :: {-k-}Gen inp (v ': vs) es a -> GenCtx inp vs es a -> CodeQ (Cont inp v a) generateSuspend k ctx = [|| let _ = "suspend" in \farInp farExp v !inp -> $$(unGen k ctx { valueStack = ValueStackCons (H.Term [||v||]) (valueStack ctx) , input = [||inp||] , farthestInput = [||farInp||] , farthestExpecting = [||farExp||] , checkedHorizon = 0 } ) ||] -- | Generate a call to the 'generateSuspend' continuation. -- Used when 'call' 'ret'urns. generateResume :: CodeQ (Cont inp v a) -> Gen inp (v ': vs) es a generateResume k = Gen { minHorizon = \_hs -> 0 , unGen = \ctx -> [|| let _ = "resume" in $$k $$(farthestInput ctx) $$(farthestExpecting ctx) (let _ = "resume.genCode" in $$(genCode (valueStackHead (valueStack ctx)))) $$(input ctx) ||] } instance Joinable Gen where defJoin (LetName n) joined k = k { minHorizon = minHorizon k , unGen = \ctx -> Code $ TH.unsafeTExpCoerce $ do body <- TH.unTypeQ $ TH.examineCode $ [|| \farInp farExp v !inp -> $$(unGen joined ctx { valueStack = ValueStackCons (H.Term [||v||]) (valueStack ctx) , input = [||inp||] , farthestInput = [||farInp||] , farthestExpecting = [||farExp||] , checkedHorizon = 0 }) ||] let decl = TH.FunD n [TH.Clause [] (TH.NormalB body) []] expr <- TH.unTypeQ (TH.examineCode (unGen k ctx { minHorizonByName = -- 'joined' is 'refJoin'able within 'k'. Map.insert n -- By definition (in 'joinNext') -- 'joined' is not recursively 'refJoin'able within 'joined', -- hence no need to prevent against recursivity -- as has to be done in 'subroutine'. (minHorizon joined (minHorizonByName ctx)) (minHorizonByName ctx) })) return (TH.LetE [decl] expr) } refJoin (LetName n) = (generateResume (Code (TH.unsafeTExpCoerce (return (TH.VarE n))))) { minHorizon = (Map.! n) } instance Readable Char Gen where read farExp p = checkHorizon . checkToken farExp p checkHorizon :: TH.Lift (InputToken inp) => {-ok-}Gen inp vs ('Succ es) a -> Gen inp vs ('Succ es) a checkHorizon ok = ok { minHorizon = \hs -> 1 + minHorizon ok hs , unGen = \ctx0@GenCtx{failStack = FailStackCons e es} -> [|| -- Factorize failure code let readFail = $$(e) in $$( let ctx = ctx0{ failStack = FailStackCons [||readFail||] es } in if checkedHorizon ctx >= 1 then unGen ok ctx0{checkedHorizon = checkedHorizon ctx - 1} else let minHoriz = minHorizon ok (minHorizonByName ctx) in [|| if $$(moreInput ctx) $$(if minHoriz > 0 then [||$$shiftRight minHoriz $$(input ctx)||] else input ctx) then $$(unGen ok ctx{checkedHorizon = minHoriz}) else let _ = "checkHorizon.else" in $$(unGen (fail [ErrorItemHorizon (minHoriz + 1)]) ctx) ||] ) ||] } checkToken :: forall inp vs es a. Ord (InputToken inp) => TH.Lift (InputToken inp) => [ErrorItem (InputToken inp)] -> {-predicate-}TermInstr (InputToken inp -> Bool) -> {-ok-}Gen inp (InputToken inp ': vs) ('Succ es) a -> Gen inp vs ('Succ es) a checkToken farExp p ok = ok { unGen = \ctx -> [|| let !(# c, cs #) = $$(nextInput ctx) $$(input ctx) in if $$(genCode p) c then $$(unGen ok ctx { valueStack = ValueStackCons (H.Term [||c||]) (valueStack ctx) , input = [||cs||] }) else let _ = "checkToken.else" in $$(unGen (fail farExp) ctx) ||] }