1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE UndecidableInstances #-} -- For Cursorable (Cursor inp)
3 -- | Build the 'Instr'uction 'Program' of a 'Machine'
4 -- from the 'Comb'inators of a 'Grammar'.
5 -- 'Instr'uctions are kept introspectable
6 -- to enable more optimizations made possible now because
7 -- of a broader knowledge of the 'Instr'uctions around
8 -- those generated (see for instance 'joinNext').
9 module Symantic.Parser.Machine.Program where
11 import Control.Monad (Monad(..), (<=<), (=<<), liftM, liftM2, sequence)
12 import Data.Function (($))
14 import Type.Reflection (Typeable)
15 import Control.DeepSeq (NFData)
16 import Data.Bool (Bool(..))
18 import Data.Function ((.))
20 import Text.Show (Show(..))
21 import qualified Data.Functor as Functor
22 import qualified Data.HashMap.Strict as HM
23 import qualified Data.Set as Set
24 import qualified Data.Traversable as Traversable
25 import qualified Language.Haskell.TH as TH
26 import qualified Language.Haskell.TH.Syntax as TH
27 import qualified Symantic.Typed.Lang as Prod
29 import Symantic.Typed.Derive
30 import Symantic.Parser.Grammar
31 import Symantic.Parser.Machine.Input
32 import Symantic.Parser.Machine.Instructions
33 import Symantic.Parser.Machine.Optimize
36 -- | A 'Program' is a tree of 'Instr'uctions,
37 -- where each 'Instr'uction is built by a continuation
38 -- to be able to introspect, duplicate and/or change
39 -- the next 'Instr'uction.
40 data Program repr inp a = Program { unProgram ::
42 -- This is the next instruction
43 SomeInstr repr inp (a ': vs) ret ->
44 -- This is the current instruction
45 -- IO is needed for 'TH.newName'.
46 IO (SomeInstr repr inp vs ret)
49 -- | Build an interpreter of the 'Program' of the given 'Machinable'.
52 Machinable (InputToken inp) repr =>
55 optimizeMachine (Program f) = derive Functor.<$> f @'[] ret
57 -- * Class 'Machinable'
58 -- | All the 'Instr'uctions.
59 type Machinable tok repr =
60 ( InstrBranchable repr
61 , InstrExceptionable repr
66 , InstrReadable tok repr
76 ( Cursorable (Cursor inp)
77 , InstrBranchable repr
78 , InstrExceptionable repr
82 , InstrReadable (InputToken inp) repr
83 , Typeable (InputToken inp)
85 Derivable (Comb CombAlternable (Program repr inp)) where
88 (Comb (SatisfyOrFail _fs p :: Comb (CombSatisfiable (InputToken inp)) (Program repr inp) a))
89 (Comb (Failure sf)) ->
90 satisfyOrFail (Set.singleton sf) p
91 Alt exn x y -> alt exn (derive x) (derive y)
93 Failure sf -> failure sf
94 Throw exn -> throw exn
95 Try x -> try (derive x)
98 ( Cursorable (Cursor inp)
99 , InstrBranchable repr
100 , InstrExceptionable repr
101 , InstrInputable repr
104 ) => CombAlternable (Program repr inp) where
105 alt exn (Program l) (Program r) = joinNext $ Program $ \next ->
107 (l (commit exn next))
108 (failIfConsumed exn Functor.<$> r next)
109 throw exn = Program $ \_next -> return $ raise exn
110 failure flr = Program $ \_next -> return $ fail (Set.singleton flr)
111 empty = Program $ \_next -> return $ fail (Set.singleton (SomeFailure FailureEmpty))
112 try (Program x) = Program $ \next ->
113 liftM2 (catch ExceptionFailure)
114 (x (commit ExceptionFailure next))
115 -- On exception, reset the input, and propagate the failure.
116 (return $ loadInput $ fail Set.empty)
118 -- | If no input has been consumed by the failing alternative
119 -- then continue with the given continuation.
120 -- Otherwise, propagate the failure.
122 Cursorable (Cursor inp) =>
123 InstrBranchable repr =>
124 InstrExceptionable repr =>
125 InstrInputable repr =>
126 InstrValuable repr =>
128 SomeInstr repr inp vs ret ->
129 SomeInstr repr inp (Cursor inp ': vs) ret
130 failIfConsumed exn k =
132 lift2Value (splice sameOffset) $
135 ExceptionLabel lbl -> raise lbl
136 ExceptionFailure -> fail Set.empty
138 -- | @('joinNext' m)@ factorize the next 'Instr'uction
139 -- to be able to reuse it multiple times without duplication.
140 -- It does so by introducing a 'defJoin'
141 -- and passing the corresponding 'refJoin'
142 -- as next 'Instr'uction to @(m)@,
143 -- unless factorizing is useless because the next 'Instr'uction
144 -- is already a 'refJoin' or a 'ret'.
145 -- It should be used each time the next 'Instr'uction
146 -- is used multiple times.
148 InstrJoinable repr =>
149 Program repr inp v ->
151 joinNext (Program m) = Program $ \case
152 -- Double refJoin Optimization:
153 -- If a join-node points directly to another join-node,
155 next@(Instr RefJoin{}) -> m next
156 -- If a join-node points directly to a 'jump',
158 -- Because 'Jump' expects an empty 'valueStack',
159 -- a 'PopValue' has to be here to drop
160 -- the value normaly expected by the 'next' 'Instr'uction.
161 next@(Instr (PopValue (Instr Jump{}))) -> m next
162 -- Terminal refJoin Optimization:
163 -- If a join-node points directly to a terminal operation,
164 -- then it's useless to introduce a join-node.
165 next@(Instr Ret{}) -> m next
166 -- Introduce a join-node.
168 !joinName <- TH.newName "join"
169 defJoin (LetName joinName) next
170 Functor.<$> m (refJoin (LetName joinName))
173 InstrValuable repr =>
174 CombApplicable (Program repr inp) where
175 pure x = Program $ return . pushValue (prodCode x)
176 Program f <*> Program x = Program $ (f <=< x) . applyValue
177 liftA2 f (Program x) (Program y) = Program $ (x <=< y) . lift2Value (prodCode f)
178 Program x *> Program y = Program (x <=< return . popValue <=< y)
179 Program x <* Program y = Program (x <=< y <=< return . popValue)
181 ( Cursorable (Cursor inp)
182 , InstrBranchable repr
183 , InstrExceptionable repr
184 , InstrInputable repr
187 ) => CombFoldable (Program repr inp) where
189 chainPre op p = go <*> p
190 where go = (Prod..) <$> op <*> go <|> pure Prod.id
191 chainPost p op = p <**> go
192 where go = (Prod..) <$> op <*> go <|> pure Prod.id
195 InstrCallable repr =>
196 Letable TH.Name (Program repr inp) where
197 shareable n (Program sub) = Program $ \next -> do
199 return $ defLet (HM.singleton n (SomeLet sub')) (call (LetName n) next)
200 ref _isRec n = Program $ \case
201 -- Tail Call Optimization:
202 -- returning just after a 'call' is useless:
203 -- using 'jump' lets the 'ret' of the 'defLet'
204 -- directly return where it would in two 'ret's.
205 Instr Ret{} -> return $ jump (LetName n)
206 next -> return $ call (LetName n) next
208 InstrCallable repr =>
209 Letsable TH.Name (Program repr inp) where
210 lets defs (Program x) = Program $ \next -> do
211 defs' <- Traversable.traverse (\(SomeLet (Program val)) -> liftM SomeLet (val ret)) defs
212 liftM (defLet defs') (x next)
214 ( Eq (InputToken inp)
215 , Cursorable (Cursor inp)
216 , InstrBranchable repr
217 , InstrExceptionable repr
218 , InstrInputable repr
220 , InstrReadable (InputToken inp) repr
221 , Typeable (InputToken inp)
223 ) => CombLookable (Program repr inp) where
224 look (Program x) = Program $ \next ->
225 liftM pushInput (x (swapValue (loadInput next)))
226 eof = negLook (satisfy (Prod.const Prod..@ Prod.bool True))
227 -- This sets a better failure message
228 <|> (Program $ \_next -> return $ fail (Set.singleton (SomeFailure FailureEnd)))
229 negLook (Program x) = Program $ \next ->
230 liftM2 (catch ExceptionFailure)
231 -- On x success, discard the result,
232 -- and replace this 'Catcher' by a failure whose 'farthestExpecting' is negated,
233 -- then a failure is raised from the input
234 -- when entering 'negLook', to avoid odd cases:
235 -- - where the failure that made (negLook x)
236 -- succeed can get the blame for the overall
237 -- failure of the grammar.
238 -- - where the overall failure of
239 -- the grammar might be blamed on something in x
240 -- that, if corrected, still makes x succeed
241 -- and (negLook x) fail.
242 (liftM pushInput $ x $
243 popValue $ commit ExceptionFailure $
244 loadInput $ fail Set.empty)
245 -- On x failure, reset the input,
246 -- and go on with the next 'Instr'uctions.
247 (return $ loadInput $ pushValue Prod.unit next)
249 ( InstrBranchable repr
251 ) => CombMatchable (Program repr inp) where
252 conditional (Program a) ps bs (Program d) = joinNext $ Program $ \next -> do
253 bs' <- Control.Monad.sequence $ (\(Program b) -> b next) Functor.<$> bs
254 a =<< liftM (choicesBranch (prodCode Functor.<$> ps) bs') (d next)
256 ( tok ~ InputToken inp
257 , InstrReadable tok repr
259 ) => CombSatisfiable tok (Program repr inp) where
260 -- Note: 'read' is left with the responsability
261 -- to apply 'normalOrderReduction' if need be.
262 satisfyOrFail fs p = Program $ return . read fs (prodCode p)
264 ( InstrBranchable repr
267 ) => CombSelectable (Program repr inp) where
268 branch (Program lr) (Program l) (Program r) = joinNext $ Program $ \next ->
269 lr =<< liftM2 caseBranch
270 (l (swapValue (applyValue next)))
271 (r (swapValue (applyValue next)))