]> Git — Sourcephile - haskell/symantic-parser.git/blob - src/Symantic/Parser/Machine/Program.hs
wip
[haskell/symantic-parser.git] / src / Symantic / Parser / Machine / Program.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE TupleSections #-}
3 {-# LANGUAGE UndecidableInstances #-} -- For Cursorable (Cursor inp)
4 -- | Build the 'Instr'uction 'Program' of a 'Machine'
5 -- from the 'Comb'inators of a 'Grammar'.
6 -- 'Instr'uctions are kept introspectable
7 -- to enable more optimizations made possible now because
8 -- of a broader knowledge of the 'Instr'uctions around
9 -- those generated (see for instance 'joinNext').
10 module Symantic.Parser.Machine.Program where
11
12 import Control.DeepSeq (NFData)
13 import Control.Monad (Monad(..), (<=<), (=<<), liftM, liftM2, sequence)
14 import Data.Bool (Bool(..))
15 import Data.Eq (Eq)
16 import Data.Function (($), (.))
17 import Data.Ord (Ord)
18 import Data.Semigroup (Semigroup(..))
19 import System.IO (IO)
20 import Text.Show (Show(..))
21 import Type.Reflection (Typeable)
22 import qualified Data.Functor as Functor
23 import qualified Data.Set as Set
24 import qualified Data.Traversable as Traversable
25 import qualified Language.Haskell.TH as TH
26 import qualified Language.Haskell.TH.Syntax as TH
27 import qualified Symantic.Class as Prod
28
29 import Symantic.Derive
30 import Symantic.Parser.Grammar
31 import Symantic.Parser.Machine.Input
32 import Symantic.Parser.Machine.Instructions
33 import Symantic.Parser.Machine.Optimize
34
35 -- * Type 'Program'
36 -- | A 'Program' is a tree of 'Instr'uctions,
37 -- where each 'Instr'uction is built by a continuation
38 -- to be able to introspect, duplicate and/or change
39 -- the next 'Instr'uction.
40 newtype Program repr inp a = Program { unProgram ::
41 forall vs ret.
42 -- This is the next instruction.
43 SomeInstr repr inp (a ': vs) ret ->
44 -- This is the current instruction
45 -- IO is needed for 'TH.newName'.
46 IO (SomeInstr repr inp vs ret)
47 }
48
49 -- | Build an interpreter of the 'Program' of the given 'Machinable'.
50 optimizeMachine ::
51 forall inp repr a.
52 Machinable (InputToken inp) repr =>
53 Program repr inp a ->
54 IO (repr inp '[] a)
55 optimizeMachine (Program f) = derive Functor.<$> f @'[] ret
56
57 -- * Class 'Machinable'
58 -- | All the 'Instr'uctions.
59 type Machinable tok repr =
60 ( InstrBranchable repr
61 , InstrComment repr
62 , InstrExceptionable repr
63 , InstrInputable repr
64 , InstrJoinable repr
65 , InstrCallable repr
66 , InstrValuable repr
67 , InstrReadable tok repr
68 , InstrIterable repr
69 , InstrRegisterable repr
70 , Eq tok
71 , Ord tok
72 , TH.Lift tok
73 , NFData tok
74 , Show tok
75 , Typeable tok
76 )
77
78 instance
79 ( Positionable (InputPosition inp)
80 , InstrBranchable repr
81 , InstrComment repr
82 , InstrExceptionable repr
83 , InstrInputable repr
84 , InstrJoinable repr
85 , InstrValuable repr
86 ) => CombAlternable (Program repr inp) where
87 alt exn (Program l) (Program r) = joinNext $ Program $ \next ->
88 liftM2
89 (catch exn)
90 (l $ commit exn next)
91 (raiseAgainIfConsumed exn Functor.<$> r next)
92 throw exn = Program $ \_next -> return $ raise exn
93 failure flr = Program $ \_next -> return $ fail (Set.singleton flr)
94 empty = Program $ \_next -> return $ fail (Set.singleton (SomeFailure FailureEmpty))
95 try (Program x) = Program $ \next ->
96 liftM2
97 (catch ExceptionFailure)
98 (x $ commit ExceptionFailure next)
99 -- On 'ExceptionFailure', reset the input, and propagate the failure.
100 (return $ loadInput $ fail Set.empty)
101
102 -- | @(raiseAgainIfConsumed exn ok)@
103 -- compares the stacked input position with the current one,
104 -- in case they're the same then continue with @(ok)@,
105 -- otherwise, re-'raise' @(exn)@,
106 -- without updating the farthest error
107 -- (which is usually done when 'fail'ing).
108 raiseAgainIfConsumed ::
109 Positionable (InputPosition inp) =>
110 InstrBranchable repr =>
111 InstrComment repr =>
112 InstrExceptionable repr =>
113 InstrInputable repr =>
114 InstrValuable repr =>
115 Exception ->
116 SomeInstr repr inp vs ret ->
117 SomeInstr repr inp (InputPosition inp ': vs) ret
118 raiseAgainIfConsumed exn ok =
119 comment "raiseAgainIfConsumed" $
120 saveInput $
121 lift2Value (splice samePosition) $
122 ifBranch ok $
123 case exn of
124 ExceptionLabel lbl -> raise lbl
125 ExceptionFailure -> fail Set.empty
126
127 -- | @('joinNext' m)@ factorize the next 'Instr'uction
128 -- to be able to reuse it multiple times without duplication.
129 -- It does so by introducing a 'defJoin'
130 -- and passing the corresponding 'refJoin'
131 -- as next 'Instr'uction to @(m)@,
132 -- unless factorizing is useless because the next 'Instr'uction
133 -- is already a 'refJoin' or a 'ret'.
134 -- It should be used each time the next 'Instr'uction
135 -- is used multiple times.
136 joinNext ::
137 InstrJoinable repr =>
138 Program repr inp v ->
139 Program repr inp v
140 joinNext (Program m) = Program $ \case
141 -- Double refJoin Optimization:
142 -- If a join-node points directly to another join-node,
143 -- then reuse it
144 next@(Instr RefJoin{}) -> m next
145 -- If a join-node points directly to a 'jump',
146 -- then reuse it.
147 -- Because 'Jump' expects an empty 'valueStack',
148 -- a 'PopValue' has to be here to drop
149 -- the value normaly expected by the 'next' 'Instr'uction.
150 next@(Instr (PopValue (Instr Jump{}))) -> m next
151 -- Terminal refJoin Optimization:
152 -- If a join-node points directly to a terminal operation,
153 -- then it's useless to introduce a join-node.
154 next@(Instr Ret{}) -> m next
155 -- Introduce a join-node.
156 next -> do
157 !joinName <- TH.newName "join"
158 defJoin (LetName joinName) next
159 Functor.<$> m (refJoin (LetName joinName))
160
161 instance
162 InstrValuable repr =>
163 CombApplicable (Program repr inp) where
164 pure x = Program $ return . pushValue (prodCode x)
165 Program f <*> Program x = Program $ (f <=< x) . applyValue
166 liftA2 f (Program x) (Program y) = Program $ (x <=< y) . lift2Value (prodCode f)
167 Program x *> Program y = Program (x <=< return . popValue <=< y)
168 Program x <* Program y = Program (x <=< y <=< return . popValue)
169 instance
170 ( Positionable (InputPosition inp)
171 , InstrBranchable repr
172 , InstrCallable repr
173 , InstrComment repr
174 , InstrExceptionable repr
175 , InstrInputable repr
176 , InstrIterable repr
177 , InstrJoinable repr
178 , InstrRegisterable repr
179 , InstrValuable repr
180 ) => CombFoldable (Program repr inp) where
181 chainPre (Program op) (Program done) =
182 new (pure Prod.id) $ \(Register r) -> Program $ \next -> do
183 !loopName <- TH.newName "loop"
184 liftM2 (iter (LetName loopName))
185 (op $
186 mapValue (Prod.flip Prod..@ (Prod..)) $
187 modifyRegister r $
188 jump True (LetName loopName) )
189 (raiseAgainIfConsumed ExceptionFailure .
190 readRegister r Functor.<$>
191 (done (applyValue next)))
192 chainPost (Program done) (Program op) =
193 new (pure Prod.id) $ \(Register r) -> Program $ \next -> do
194 !loopName <- TH.newName "loop"
195 liftM2 (iter (LetName loopName))
196 (op $
197 modifyRegister (UnscopedRegister (unUnscopedRegister r)) $
198 jump True (LetName loopName) )
199 (raiseAgainIfConsumed ExceptionFailure .
200 readRegister r Functor.<$>
201 (done (applyValue next)))
202 instance
203 InstrCallable repr =>
204 Referenceable TH.Name (Program repr inp) where
205 -- TODO: isRec should be passed to 'call' and 'jump'
206 -- instead of redoing the work with 'CallTrace'.
207 ref isRec name = Program $ \case
208 -- Tail Call Optimization:
209 -- returning just after a 'call' is useless:
210 -- using 'jump' lets the 'ret' of the 'defLet'
211 -- directly return where it would in two 'ret's.
212 Instr Ret{} -> return $ jump isRec (LetName name)
213 next -> return $ call isRec (LetName name) next
214 {-
215 refable n (Program sub) = Program $ \next -> do
216 sub' <- sub ret
217 return $ defLet (HM.singleton n (SomeLet sub')) (call (LetName n) next)
218 -}
219 instance
220 InstrCallable repr =>
221 Letsable TH.Name (Program repr inp) where
222 lets defs (Program body) = Program $ \next -> do
223 -- Every definition becomes a 'call'able subroutine.
224 defs' <- Traversable.traverse
225 (\(SomeLet (Program val)) -> liftM SomeLet (val ret))
226 defs
227 liftM (defLet defs') (body next)
228 instance
229 ( Eq (InputToken inp)
230 , Positionable (InputPosition inp)
231 , InstrBranchable repr
232 , InstrExceptionable repr
233 , InstrInputable repr
234 , InstrJoinable repr
235 , InstrReadable (InputToken inp) repr
236 , Typeable (InputToken inp)
237 , InstrValuable repr
238 , InstrComment repr
239 ) => CombLookable (Program repr inp) where
240 look (Program x) = Program $ \next ->
241 liftM (comment "look") $
242 liftM saveInput $
243 x $
244 swapValue $
245 loadInput next
246 eof =
247 negLook (satisfy (Prod.const Prod..@ Prod.constant True))
248 -- This sets a better failure message
249 <|> (Program $ \_next ->
250 return $
251 comment "eof.fail" $
252 fail (Set.singleton (SomeFailure FailureEnd)))
253 negLook (Program x) = Program $ \next ->
254 liftM (comment "negLook") $
255 liftM2 (catch ExceptionFailure)
256 -- On x success, discard the result,
257 -- and replace this 'OnException' by a failure
258 -- whose 'farthestExpecting' is negated,
259 -- then a failure is raised from the input
260 -- when entering 'negLook', to avoid odd cases:
261 -- - where the failure that made (negLook x)
262 -- succeed can get the blame for the overall
263 -- failure of the grammar.
264 -- - where the overall failure of
265 -- the grammar might be blamed on something in x
266 -- that, if corrected, still makes x succeed
267 -- and (negLook x) fail.
268 (
269 liftM (comment "negLook.ahead") $
270 liftM saveInput $
271 x $
272 popValue $
273 commit ExceptionFailure $
274 loadInput $
275 fail Set.empty
276 )
277 -- On the failure of x: reset the input,
278 -- and go on with the next 'Instr'uctions.
279 (
280 liftM (comment "negLook.reset") $
281 return $
282 loadInput $
283 pushValue Prod.unit next
284 )
285 instance
286 ( InstrBranchable repr
287 , InstrJoinable repr
288 ) => CombMatchable (Program repr inp) where
289 conditional (Program a) bs (Program d) = joinNext $ Program $ \next -> do
290 bs' <- Control.Monad.sequence $ (\(p, b) -> (prodCode p,) Functor.<$> unProgram b next) Functor.<$> bs
291 a =<< liftM (choicesBranch bs') (d next)
292 instance
293 ( tok ~ InputToken inp
294 , InstrReadable tok repr
295 , InstrComment repr
296 , Typeable tok
297 ) => CombSatisfiable tok (Program repr inp) where
298 satisfyOrFail fs p = Program $ \next ->
299 return $
300 comment ("satisfy "<>showsPrec 11 (prodCode p) "") $
301 read fs (prodCode p) next
302 instance
303 ( InstrBranchable repr
304 , InstrJoinable repr
305 , InstrValuable repr
306 ) => CombSelectable (Program repr inp) where
307 branch (Program lr) (Program l) (Program r) = joinNext $ Program $ \next ->
308 lr =<< liftM2 caseBranch
309 (l (swapValue (applyValue next)))
310 (r (swapValue (applyValue next)))
311 instance
312 ( InstrValuable repr
313 , InstrRegisterable repr
314 ) => CombRegisterable (Program repr inp) where
315 new (Program p) k = Program $ \next -> do
316 !regName <- TH.newName "reg"
317 p =<< liftM (newRegister (UnscopedRegister regName))
318 (unProgram (k (Register (UnscopedRegister regName))) next)
319 get (Register r) = Program $ \next ->
320 return $ readRegister r next
321 put (Register r) (Program k) = Program $ \next ->
322 k $ writeRegister r $ pushValue Prod.unit next
323 instance
324 ( InstrValuable repr
325 , InstrRegisterable repr
326 ) => CombRegisterableUnscoped (Program repr inp) where
327 newUnscoped r (Program p) k = Program $ \next ->
328 p =<< liftM (newRegister r) (unProgram k next)
329 getUnscoped r = Program $ return . readRegister r
330 putUnscoped r (Program k) = Program $
331 k . writeRegister r . pushValue Prod.unit