build: move all Nix config to `flake.nix`
[haskell/symantic-parser.git] / src / Symantic / Parser / Machine / Program.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE TupleSections #-}
3 {-# LANGUAGE UndecidableInstances #-} -- For Cursorable (Cursor inp)
4 -- | Build the 'Instr'uction 'Program' of a 'Machine'
5 -- from the 'Comb'inators of a 'Grammar'.
6 -- 'Instr'uctions are kept introspectable
7 -- to enable more optimizations made possible now because
8 -- of a broader knowledge of the 'Instr'uctions around
9 -- those generated (see for instance 'joinNext').
10 module Symantic.Parser.Machine.Program where
11
12 import Control.DeepSeq (NFData)
13 import Control.Monad (Monad(..), (<=<), (=<<), liftM, liftM2, sequence)
14 import Data.Bool (Bool(..))
15 import Data.Eq (Eq)
16 import Data.Function (($))
17 import Data.Function ((.))
18 import Data.Ord (Ord)
19 import Data.Semigroup (Semigroup(..))
20 import System.IO (IO)
21 import Text.Show (Show(..))
22 import Type.Reflection (Typeable)
23 import qualified Data.Functor as Functor
24 import qualified Data.Set as Set
25 import qualified Data.Traversable as Traversable
26 import qualified Language.Haskell.TH as TH
27 import qualified Language.Haskell.TH.Syntax as TH
28 import qualified Symantic.Class as Prod
29
30 import Symantic.Derive
31 import Symantic.Parser.Grammar
32 import Symantic.Parser.Machine.Input
33 import Symantic.Parser.Machine.Instructions
34 import Symantic.Parser.Machine.Optimize
35
36 -- * Type 'Program'
37 -- | A 'Program' is a tree of 'Instr'uctions,
38 -- where each 'Instr'uction is built by a continuation
39 -- to be able to introspect, duplicate and/or change
40 -- the next 'Instr'uction.
41 data Program repr inp a = Program { unProgram ::
42 forall vs ret.
43 -- This is the next instruction.
44 SomeInstr repr inp (a ': vs) ret ->
45 -- This is the current instruction
46 -- IO is needed for 'TH.newName'.
47 IO (SomeInstr repr inp vs ret)
48 }
49
50 -- | Build an interpreter of the 'Program' of the given 'Machinable'.
51 optimizeMachine ::
52 forall inp repr a.
53 Machinable (InputToken inp) repr =>
54 Program repr inp a ->
55 IO (repr inp '[] a)
56 optimizeMachine (Program f) = derive Functor.<$> f @'[] ret
57
58 -- * Class 'Machinable'
59 -- | All the 'Instr'uctions.
60 type Machinable tok repr =
61 ( InstrBranchable repr
62 , InstrComment repr
63 , InstrExceptionable repr
64 , InstrInputable repr
65 , InstrJoinable repr
66 , InstrCallable repr
67 , InstrValuable repr
68 , InstrReadable tok repr
69 , InstrIterable repr
70 , InstrRegisterable repr
71 , Eq tok
72 , Ord tok
73 , TH.Lift tok
74 , NFData tok
75 , Show tok
76 , Typeable tok
77 )
78
79 instance
80 ( Cursorable (Cursor inp)
81 , InstrBranchable repr
82 , InstrComment repr
83 , InstrExceptionable repr
84 , InstrInputable repr
85 , InstrJoinable repr
86 , InstrValuable repr
87 ) => CombAlternable (Program repr inp) where
88 alt exn (Program l) (Program r) = joinNext $ Program $ \next ->
89 liftM2 (catch exn)
90 (l (commit exn next))
91 (raiseAgainIfConsumed exn Functor.<$> r next)
92 throw exn = Program $ \_next -> return $ raise exn
93 failure flr = Program $ \_next -> return $ fail (Set.singleton flr)
94 empty = Program $ \_next -> return $ fail (Set.singleton (SomeFailure FailureEmpty))
95 try (Program x) = Program $ \next ->
96 liftM2 (catch ExceptionFailure)
97 (x (commit ExceptionFailure next))
98 -- On 'ExceptionFailure', reset the input, and propagate the failure.
99 (return $ loadInput $ fail Set.empty)
100
101 -- | @(raiseAgainIfConsumed exn ok)@
102 -- compares the stacked input position with the current one,
103 -- in case they're the same then continue with @(ok)@,
104 -- otherwise, re-'raise' @(exn)@,
105 -- without updating the farthest error
106 -- (which is usually done when 'fail'ing).
107 raiseAgainIfConsumed ::
108 Cursorable (Cursor inp) =>
109 InstrBranchable repr =>
110 InstrComment repr =>
111 InstrExceptionable repr =>
112 InstrInputable repr =>
113 InstrValuable repr =>
114 Exception ->
115 SomeInstr repr inp vs ret ->
116 SomeInstr repr inp (Cursor inp ': vs) ret
117 raiseAgainIfConsumed exn ok =
118 comment "raiseAgainIfConsumed" $
119 saveInput $
120 lift2Value (splice sameOffset) $
121 ifBranch ok $
122 case exn of
123 ExceptionLabel lbl -> raise lbl
124 ExceptionFailure -> fail Set.empty
125
126 -- | @('joinNext' m)@ factorize the next 'Instr'uction
127 -- to be able to reuse it multiple times without duplication.
128 -- It does so by introducing a 'defJoin'
129 -- and passing the corresponding 'refJoin'
130 -- as next 'Instr'uction to @(m)@,
131 -- unless factorizing is useless because the next 'Instr'uction
132 -- is already a 'refJoin' or a 'ret'.
133 -- It should be used each time the next 'Instr'uction
134 -- is used multiple times.
135 joinNext ::
136 InstrJoinable repr =>
137 Program repr inp v ->
138 Program repr inp v
139 joinNext (Program m) = Program $ \case
140 -- Double refJoin Optimization:
141 -- If a join-node points directly to another join-node,
142 -- then reuse it
143 next@(Instr RefJoin{}) -> m next
144 -- If a join-node points directly to a 'jump',
145 -- then reuse it.
146 -- Because 'Jump' expects an empty 'valueStack',
147 -- a 'PopValue' has to be here to drop
148 -- the value normaly expected by the 'next' 'Instr'uction.
149 next@(Instr (PopValue (Instr Jump{}))) -> m next
150 -- Terminal refJoin Optimization:
151 -- If a join-node points directly to a terminal operation,
152 -- then it's useless to introduce a join-node.
153 next@(Instr Ret{}) -> m next
154 -- Introduce a join-node.
155 next -> do
156 !joinName <- TH.newName "join"
157 defJoin (LetName joinName) next
158 Functor.<$> m (refJoin (LetName joinName))
159
160 instance
161 InstrValuable repr =>
162 CombApplicable (Program repr inp) where
163 pure x = Program $ return . pushValue (prodCode x)
164 Program f <*> Program x = Program $ (f <=< x) . applyValue
165 liftA2 f (Program x) (Program y) = Program $ (x <=< y) . lift2Value (prodCode f)
166 Program x *> Program y = Program (x <=< return . popValue <=< y)
167 Program x <* Program y = Program (x <=< y <=< return . popValue)
168 instance
169 ( Cursorable (Cursor inp)
170 , InstrBranchable repr
171 , InstrCallable repr
172 , InstrComment repr
173 , InstrExceptionable repr
174 , InstrInputable repr
175 , InstrIterable repr
176 , InstrJoinable repr
177 , InstrRegisterable repr
178 , InstrValuable repr
179 ) => CombFoldable (Program repr inp) where
180 chainPre (Program op) (Program done) =
181 new (pure Prod.id) $ \(Register r) -> Program $ \next -> do
182 !loopName <- TH.newName "loop"
183 liftM2 (iter (LetName loopName))
184 (op $
185 mapValue (Prod.flip Prod..@ (Prod..)) $
186 modifyRegister r $
187 jump True (LetName loopName) )
188 (raiseAgainIfConsumed ExceptionFailure .
189 readRegister r Functor.<$>
190 (done (applyValue next)))
191 chainPost (Program done) (Program op) =
192 new (pure Prod.id) $ \(Register r) -> Program $ \next -> do
193 !loopName <- TH.newName "loop"
194 liftM2 (iter (LetName loopName))
195 (op $
196 modifyRegister (UnscopedRegister (unUnscopedRegister r)) $
197 jump True (LetName loopName) )
198 (raiseAgainIfConsumed ExceptionFailure .
199 readRegister r Functor.<$>
200 (done (applyValue next)))
201 instance
202 InstrCallable repr =>
203 Referenceable TH.Name (Program repr inp) where
204 -- TODO: isRec should be passed to 'call' and 'jump'
205 -- instead of redoing the work with 'CallTrace'.
206 ref isRec name = Program $ \case
207 -- Tail Call Optimization:
208 -- returning just after a 'call' is useless:
209 -- using 'jump' lets the 'ret' of the 'defLet'
210 -- directly return where it would in two 'ret's.
211 Instr Ret{} -> return $ jump isRec (LetName name)
212 next -> return $ call isRec (LetName name) next
213 {-
214 refable n (Program sub) = Program $ \next -> do
215 sub' <- sub ret
216 return $ defLet (HM.singleton n (SomeLet sub')) (call (LetName n) next)
217 -}
218 instance
219 InstrCallable repr =>
220 Letsable TH.Name (Program repr inp) where
221 lets defs (Program body) = Program $ \next -> do
222 -- Every definition becomes a 'call'able subroutine.
223 defs' <- Traversable.traverse
224 (\(SomeLet (Program val)) -> liftM SomeLet (val ret))
225 defs
226 liftM (defLet defs') (body next)
227 instance
228 ( Eq (InputToken inp)
229 , Cursorable (Cursor inp)
230 , InstrBranchable repr
231 , InstrExceptionable repr
232 , InstrInputable repr
233 , InstrJoinable repr
234 , InstrReadable (InputToken inp) repr
235 , Typeable (InputToken inp)
236 , InstrValuable repr
237 , InstrComment repr
238 ) => CombLookable (Program repr inp) where
239 look (Program x) = Program $ \next ->
240 liftM (comment "look") $
241 liftM saveInput (x (swapValue (loadInput next)))
242 eof =
243 negLook (satisfy (Prod.const Prod..@ Prod.constant True))
244 -- This sets a better failure message
245 <|> (Program $ \_next ->
246 return $ comment "eof.fail" $ fail (Set.singleton (SomeFailure FailureEnd)))
247 negLook (Program x) = Program $ \next ->
248 liftM (comment "negLook") $
249 liftM2 (catch ExceptionFailure)
250 -- On x success, discard the result,
251 -- and replace this 'Catcher' by a failure whose 'farthestExpecting' is negated,
252 -- then a failure is raised from the input
253 -- when entering 'negLook', to avoid odd cases:
254 -- - where the failure that made (negLook x)
255 -- succeed can get the blame for the overall
256 -- failure of the grammar.
257 -- - where the overall failure of
258 -- the grammar might be blamed on something in x
259 -- that, if corrected, still makes x succeed
260 -- and (negLook x) fail.
261 (
262 liftM (comment "negLook.ahead") $
263 liftM saveInput $ x $
264 popValue $ commit ExceptionFailure $
265 loadInput $ fail Set.empty
266 )
267 -- On x failure, reset the input,
268 -- and go on with the next 'Instr'uctions.
269 (
270 liftM (comment "negLook.reset") $
271 return $ loadInput $ pushValue Prod.unit next
272 )
273 instance
274 ( InstrBranchable repr
275 , InstrJoinable repr
276 ) => CombMatchable (Program repr inp) where
277 conditional (Program a) bs (Program d) = joinNext $ Program $ \next -> do
278 bs' <- Control.Monad.sequence $ (\(p, b) -> (prodCode p,) Functor.<$> unProgram b next) Functor.<$> bs
279 a =<< liftM (choicesBranch bs') (d next)
280 instance
281 ( tok ~ InputToken inp
282 , InstrReadable tok repr
283 , InstrComment repr
284 , Typeable tok
285 ) => CombSatisfiable tok (Program repr inp) where
286 satisfyOrFail fs p = Program $ \next ->
287 return $
288 comment ("satisfy "<>showsPrec 11 (prodCode p) "") $
289 read fs (prodCode p) next
290 instance
291 ( InstrBranchable repr
292 , InstrJoinable repr
293 , InstrValuable repr
294 ) => CombSelectable (Program repr inp) where
295 branch (Program lr) (Program l) (Program r) = joinNext $ Program $ \next ->
296 lr =<< liftM2 caseBranch
297 (l (swapValue (applyValue next)))
298 (r (swapValue (applyValue next)))
299 instance
300 ( InstrValuable repr
301 , InstrRegisterable repr
302 ) => CombRegisterable (Program repr inp) where
303 new (Program p) k = Program $ \next -> do
304 !regName <- TH.newName "reg"
305 p =<< liftM (newRegister (UnscopedRegister regName))
306 (unProgram (k (Register (UnscopedRegister regName))) next)
307 get (Register r) = Program $ \next ->
308 return $ readRegister r next
309 put (Register r) (Program k) = Program $ \next ->
310 k $ writeRegister r $ pushValue Prod.unit next
311 instance
312 ( InstrValuable repr
313 , InstrRegisterable repr
314 ) => CombRegisterableUnscoped (Program repr inp) where
315 newUnscoped r (Program p) k = Program $ \next ->
316 p =<< liftM (newRegister r) (unProgram k next)
317 getUnscoped r = Program $ return . readRegister r
318 putUnscoped r (Program k) = Program $
319 k . writeRegister r . pushValue Prod.unit