]> Git — Sourcephile - haskell/symantic-parser.git/blob - src/Symantic/Parser/Machine/Generate.hs
bug: a ref outside its def must be supported
[haskell/symantic-parser.git] / src / Symantic / Parser / Machine / Generate.hs
1 {-# LANGUAGE AllowAmbiguousTypes #-}
2 {-# LANGUAGE StandaloneDeriving #-} -- For Show (ParsingError inp)
3 {-# LANGUAGE TemplateHaskell #-}
4 {-# LANGUAGE UnboxedTuples #-} -- For nextInput
5 {-# LANGUAGE UndecidableInstances #-} -- For Show (ParsingError inp)
6 module Symantic.Parser.Machine.Generate where
7
8 import Control.Monad (Monad(..))
9 import Data.Bool (Bool)
10 import Data.Char (Char)
11 import Data.Either (Either(..))
12 import Data.Function (($), (.))
13 import Data.Functor ((<$>))
14 import Data.Int (Int)
15 import Data.List (minimum)
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Map (Map)
18 import Data.Maybe (Maybe(..))
19 import Data.Monoid (Monoid(..))
20 import Data.Ord (Ord(..), Ordering(..))
21 import Data.Semigroup (Semigroup(..))
22 import Data.Set (Set)
23 import Data.String (String)
24 import Language.Haskell.TH (CodeQ, Code(..))
25 import Prelude ((+), (-))
26 import Text.Show (Show(..))
27 import GHC.TypeLits (symbolVal)
28 import qualified Data.List.NonEmpty as NE
29 import qualified Data.Map.Internal as Map_
30 import qualified Data.Map.Strict as Map
31 import qualified Data.Set as Set
32 import qualified Language.Haskell.TH as TH
33 import qualified Language.Haskell.TH.Syntax as TH
34
35 import Symantic.Univariant.Trans
36 import Symantic.Parser.Grammar.Combinators (ErrorItem(..))
37 import Symantic.Parser.Machine.Input
38 import Symantic.Parser.Machine.Instructions
39 import qualified Symantic.Parser.Haskell as H
40
41 import Debug.Trace (trace)
42
43 genCode :: TermInstr a -> CodeQ a
44 genCode = trans
45
46 -- * Type 'Gen'
47 -- | Generate the 'CodeQ' parsing the input.
48 data Gen inp vs a = Gen
49 { minHorizon :: Map TH.Name Horizon -> Horizon
50 -- ^ Synthetized (bottom-up) minimal input length
51 -- required by the parser to not fail.
52 -- This requires a 'minHorizonByName'
53 -- containing the minimal 'Horizon's of all the 'TH.Name's
54 -- this parser 'call's, 'jump's or 'refJoin's to.
55 , exceptions :: Map TH.Name (Map ErrorLabel ()) -> Map ErrorLabel ()
56 , unGen ::
57 GenCtx inp vs a ->
58 CodeQ (Either (ParsingError inp) a)
59 }
60
61 -- ** Type 'ParsingError'
62 data ParsingError inp
63 = ParsingErrorStandard
64 { parsingErrorOffset :: Offset
65 -- | Note that if an 'ErrorItemHorizon' greater than 1
66 -- is amongst the 'parsingErrorExpecting'
67 -- then this is only the 'InputToken'
68 -- at the begining of the expected 'Horizon'.
69 , parsingErrorUnexpected :: Maybe (InputToken inp)
70 , parsingErrorExpecting :: Set (ErrorItem (InputToken inp))
71 }
72 deriving instance Show (InputToken inp) => Show (ParsingError inp)
73
74 -- ** Type 'ErrorLabel'
75 type ErrorLabel = String
76
77 -- ** Type 'Offset'
78 type Offset = Int
79
80 -- ** Type 'Horizon'
81 -- | Synthetized minimal input length
82 -- required for a successful parsing.
83 -- Used with 'checkedHorizon' to factorize input length checks,
84 -- instead of checking the input length
85 -- one 'InputToken' at a time at each 'read'.
86 type Horizon = Offset
87
88 -- ** Type 'Cont'
89 type Cont inp v a =
90 {-farthestInput-}Cursor inp ->
91 {-farthestExpecting-}[ErrorItem (InputToken inp)] ->
92 v ->
93 Cursor inp ->
94 Either (ParsingError inp) a
95
96 {-
97 -- *** Type 'FarthestError'
98 data FarthestError inp = FarthestError
99 { farthestInput :: Cursor inp
100 , farthestExpecting :: [ErrorItem (InputToken inp)]
101 }
102 -}
103
104 -- | @('generateCode' input mach)@ generates @TemplateHaskell@ code
105 -- parsing the given 'input' according to the given 'Machine'.
106 generateCode ::
107 Ord (InputToken inp) =>
108 Show (InputToken inp) =>
109 TH.Lift (InputToken inp) =>
110 -- InputToken inp ~ Char =>
111 Input inp =>
112 Show (Cursor inp) =>
113 Gen inp '[] a ->
114 CodeQ (inp -> Either (ParsingError inp) a)
115 generateCode k = [|| \(input :: inp) ->
116 -- Pattern bindings containing unlifted types
117 -- should use an outermost bang pattern.
118 let !(# init, readMore, readNext #) = $$(cursorOf [||input||]) in
119 let finalRet = \_farInp _farExp v _inp -> Right v in
120 let finalRaise :: forall b. (Catcher inp b)
121 = \_failInp !farInp !farExp ->
122 Left ParsingErrorStandard
123 { parsingErrorOffset = offset farInp
124 , parsingErrorUnexpected =
125 if readMore farInp
126 then Just (let (# c, _ #) = readNext farInp in c)
127 else Nothing
128 , parsingErrorExpecting = Set.fromList farExp
129 } in
130 $$(unGen k GenCtx
131 { valueStack = ValueStackEmpty
132 , catchStackByLabel = Map.empty
133 , defaultCatch = [||finalRaise||]
134 , retCode = [||finalRet||]
135 , input = [||init||]
136 , nextInput = [||readNext||]
137 , moreInput = [||readMore||]
138 -- , farthestError = [||Nothing||]
139 , farthestInput = [||init||]
140 , farthestExpecting = [|| [] ||]
141 , checkedHorizon = 0
142 , minHorizonByName = Map.empty
143 , exceptionsByName = Map.empty
144 })
145 ||]
146
147 -- ** Type 'GenCtx'
148 -- | This is an inherited (top-down) context
149 -- only present at compile-time, to build TemplateHaskell splices.
150 data GenCtx inp vs a =
151 ( TH.Lift (InputToken inp)
152 , Cursorable (Cursor inp)
153 , Show (InputToken inp)
154 ) => GenCtx
155 { valueStack :: ValueStack vs
156 , catchStackByLabel :: Map ErrorLabel (NonEmpty (CodeQ (Catcher inp a)))
157 -- | Default 'Catcher' defined at the begining of the generated 'CodeQ',
158 -- hence a constant within the 'Gen'eration.
159 , defaultCatch :: forall b. CodeQ (Catcher inp b)
160 , retCode :: CodeQ (Cont inp a a)
161 , input :: CodeQ (Cursor inp)
162 , moreInput :: CodeQ (Cursor inp -> Bool)
163 , nextInput :: CodeQ (Cursor inp -> (# InputToken inp, Cursor inp #))
164 , farthestInput :: CodeQ (Cursor inp)
165 , farthestExpecting :: CodeQ [ErrorItem (InputToken inp)]
166 -- | Remaining horizon already checked.
167 -- Updated by 'checkHorizon'
168 -- and reset elsewhere when needed.
169 , checkedHorizon :: Horizon
170 -- | Minimal horizon for each 'defLet' or 'defJoin'.
171 -- This can be done as an inherited attribute because
172 -- 'OverserveSharing' introduces 'def' as an ancestor node
173 -- of all the 'ref's pointing to it.
174 -- Same for 'defJoin' and its 'refJoin's.
175 , minHorizonByName :: Map TH.Name Horizon
176 , exceptionsByName :: Map TH.Name (Map ErrorLabel ())
177 }
178
179 -- ** Type 'ValueStack'
180 data ValueStack vs where
181 ValueStackEmpty :: ValueStack '[]
182 ValueStackCons ::
183 { valueStackHead :: TermInstr v
184 , valueStackTail :: ValueStack vs
185 } -> ValueStack (v ': vs)
186
187 instance InstrValuable Gen where
188 pushValue x k = k
189 { unGen = \ctx -> unGen k ctx
190 { valueStack = ValueStackCons x (valueStack ctx) }
191 }
192 popValue k = k
193 { unGen = \ctx -> unGen k ctx
194 { valueStack = valueStackTail (valueStack ctx) }
195 }
196 lift2Value f k = k
197 { unGen = \ctx -> unGen k ctx
198 { valueStack =
199 let ValueStackCons y (ValueStackCons x xs) = valueStack ctx in
200 ValueStackCons (f H.:@ x H.:@ y) xs
201 }
202 }
203 swapValue k = k
204 { unGen = \ctx -> unGen k ctx
205 { valueStack =
206 let ValueStackCons y (ValueStackCons x xs) = valueStack ctx in
207 ValueStackCons x (ValueStackCons y xs)
208 }
209 }
210 instance InstrBranchable Gen where
211 caseBranch kx ky = Gen
212 { minHorizon = \hs -> minHorizon kx hs `min` minHorizon ky hs
213 , exceptions = \hs -> exceptions kx hs <> exceptions ky hs
214 , unGen = \ctx ->
215 let ValueStackCons v vs = valueStack ctx in
216 [||
217 case $$(genCode v) of
218 Left x -> $$(unGen kx ctx{ valueStack = ValueStackCons (H.Term [||x||]) vs })
219 Right y -> $$(unGen ky ctx{ valueStack = ValueStackCons (H.Term [||y||]) vs })
220 ||]
221 }
222 choicesBranch fs ks kd = Gen
223 { minHorizon = \hs -> minimum $
224 minHorizon kd hs :
225 (($ hs) . minHorizon <$> ks)
226 , exceptions = \hs -> mconcat $
227 exceptions kd hs :
228 (($ hs) . exceptions <$> ks)
229 , unGen = \ctx ->
230 let ValueStackCons v vs = valueStack ctx in
231 go ctx{valueStack = vs} v fs ks
232 }
233 where
234 go ctx x (f:fs') (k:ks') = [||
235 if $$(genCode (f H.:@ x))
236 then $$(unGen k ctx)
237 else $$(go ctx x fs' ks')
238 ||]
239 go ctx _ _ _ = unGen kd ctx
240 instance InstrExceptionable Gen where
241 raiseException lbl failExp = Gen
242 { minHorizon = \_hs -> 0
243 , exceptions = \_hs -> Map.singleton (symbolVal lbl) ()
244 , unGen = \ctx@GenCtx{} -> [||
245 let (# farInp, farExp #) =
246 case $$compareOffset $$(farthestInput ctx) $$(input ctx) of
247 LT -> (# $$(input ctx), failExp #)
248 EQ -> (# $$(farthestInput ctx), $$(farthestExpecting ctx) <> failExp #)
249 GT -> (# $$(farthestInput ctx), $$(farthestExpecting ctx) #) in
250 $$(NE.head (Map.findWithDefault (NE.singleton (defaultCatch ctx)) (symbolVal lbl) (catchStackByLabel ctx)))
251 $$(input ctx) farInp farExp
252 ||]
253 }
254 popException lbl k = k
255 { unGen = \ctx ->
256 unGen k ctx{catchStackByLabel = Map.update (\case
257 _r0:|(r1:rs) -> Just (r1:|rs)
258 _ -> Nothing
259 ) (symbolVal lbl) (catchStackByLabel ctx)
260 }
261 }
262 catchException lbl ok ko = Gen
263 { minHorizon = \hs -> minHorizon ok hs `min` minHorizon ko hs
264 , exceptions = \hs -> exceptions ok hs <> exceptions ko hs
265 , unGen = \ctx@GenCtx{} -> [||
266 let _ = $$(TH.liftTyped ("catchException lbl="<>symbolVal lbl)) in
267 $$(unGen ok ctx
268 { catchStackByLabel = Map.insertWith (<>) (symbolVal lbl)
269 (NE.singleton ([|| \ !failInp !farInp !farExp ->
270 $$(unGen ko ctx
271 -- PushValue the input as it was when entering the catchFail.
272 { valueStack = ValueStackCons (H.Term (input ctx)) (valueStack ctx)
273 -- Note that 'catchStackByLabel' is reset.
274 -- Move the input to the failing position.
275 , input = [||failInp||]
276 -- Set the farthestInput to the farthest computed by 'fail'
277 , farthestInput = [||farInp||]
278 , farthestExpecting = [||farExp||]
279 })
280 ||])) (catchStackByLabel ctx)
281 }
282 ) ||]
283 }
284 -- ** Type 'Catcher'
285 type Catcher inp a =
286 {-failureInput-}Cursor inp ->
287 {-farthestInput-}Cursor inp ->
288 {-farthestExpecting-}[ErrorItem (InputToken inp)] ->
289 Either (ParsingError inp) a
290 instance InstrInputable Gen where
291 loadInput k = k
292 { unGen = \ctx ->
293 let ValueStackCons input vs = valueStack ctx in
294 unGen k ctx
295 { valueStack = vs
296 , input = genCode input
297 , checkedHorizon = 0
298 }
299 }
300 pushInput k = k
301 { unGen = \ctx ->
302 unGen k ctx{valueStack = ValueStackCons (H.Term (input ctx)) (valueStack ctx)}
303 }
304 instance InstrLetable Gen where
305 defLet (LetName n) sub k = k
306 { unGen = \ctx@GenCtx{} -> Code $ TH.unsafeTExpCoerce $ do
307 -- 'sub' is recursively 'call'able within 'sub',
308 -- but its maximal 'minHorizon' is not known yet.
309 let minHorizonByNameButSub = Map.insert n 0 (minHorizonByName ctx)
310 let raiseLabelsByNameButSub = Map.insert n Map.empty (exceptionsByName ctx)
311 body <- TH.unTypeQ $ TH.examineCode $ [|| -- buildRec in Parsley
312 -- Called by 'call' or 'jump'.
313 \ !ok{-from generateSuspend or retCode-}
314 !inp
315 !koByLabel{- 'catchStackByLabel' from the 'call'-site -} ->
316 $$(unGen sub ctx
317 { valueStack = ValueStackEmpty
318 -- Build a 'catchStackByLabel' from the one available at the 'call'-site.
319 -- Note that all the 'exceptions' of the 'sub'routine may not be available,
320 -- hence 'Map.findWithDefault' is used instead of 'Map.!'.
321 , catchStackByLabel = Map.mapWithKey
322 (\lbl () -> NE.singleton [||Map.findWithDefault $$(defaultCatch ctx) lbl koByLabel||])
323 (exceptions sub raiseLabelsByNameButSub)
324 , input = [||inp||]
325 , retCode = [||ok||]
326
327 -- These are passed by the caller via 'ok' or 'ko'
328 -- , farthestInput =
329 -- , farthestExpecting =
330
331 -- Some callers can call this 'defLet'
332 -- with zero 'checkedHorizon', hence use this minimum.
333 -- TODO: maybe it could be improved a bit
334 -- by taking the minimum of the checked horizons
335 -- before all the 'call's and 'jump's to this 'defLet'.
336 , checkedHorizon = 0
337 , minHorizonByName = minHorizonByNameButSub
338 , exceptionsByName = raiseLabelsByNameButSub
339 })
340 ||]
341 let decl = TH.FunD n [TH.Clause [] (TH.NormalB body) []]
342 expr <- TH.unTypeQ (TH.examineCode (unGen k ctx
343 { minHorizonByName =
344 -- 'sub' is 'call'able within 'k'.
345 Map.insert n
346 (minHorizon sub minHorizonByNameButSub)
347 (minHorizonByName ctx)
348 , exceptionsByName =
349 Map.insert n
350 (exceptions sub raiseLabelsByNameButSub)
351 (exceptionsByName ctx)
352 }))
353 return (TH.LetE [decl] expr)
354 }
355 jump (LetName n) = Gen
356 { minHorizon = (Map.! n)
357 , exceptions = (Map.! n)
358 , unGen = \ctx -> [||
359 let _ = "jump" in
360 $$(Code (TH.unsafeTExpCoerce (return (TH.VarE n))))
361 {-ok-}$$(retCode ctx)
362 $$(input ctx)
363 $$(liftTypedRaiseByLabel $
364 catchStackByLabel ctx
365 -- Pass only the labels raised by the 'defLet'.
366 `Map.intersection`
367 (exceptionsByName ctx Map.! n)
368 )
369 ||]
370 }
371 call (LetName n) k = k
372 { minHorizon = (Map.! n)
373 , exceptions = (Map.! n)
374 , unGen = \ctx -> let ks = (Map.keys (catchStackByLabel ctx)) in [||
375 let _ = $$(TH.liftTyped $ "call exceptionsByName("<>show n<>")="<>show (Map.keys (Map.findWithDefault Map.empty n (exceptionsByName ctx))) <> " catchStackByLabel(ctx)="<> show ks) in
376 $$(Code (TH.unsafeTExpCoerce (return (TH.VarE n))))
377 {-ok-}$$(generateSuspend k ctx)
378 $$(input ctx)
379 $$(liftTypedRaiseByLabel $
380 catchStackByLabel ctx
381 -- Pass only the labels raised by the 'defLet'.
382 `Map.intersection`
383 (exceptionsByName ctx Map.! n)
384 )
385 ||]
386 }
387 ret = Gen
388 { minHorizon = \_hs -> 0
389 , exceptions = \_hs -> Map.empty
390 , unGen = \ctx -> unGen (generateResume (retCode ctx)) ctx
391 }
392
393 -- | Like 'TH.liftTyped' but adjusted to work on 'catchStackByLabel'
394 -- which already contains 'CodeQ' terms.
395 -- Moreover, only the 'Catcher' at the top of the stack
396 -- is needed and thus generated in the resulting 'CodeQ'.
397 --
398 -- TODO: Use an 'Array' instead of a 'Map'?
399 liftTypedRaiseByLabel :: TH.Lift k => Map k (NonEmpty (CodeQ a)) -> CodeQ (Map k a)
400 liftTypedRaiseByLabel Map_.Tip = [|| Map_.Tip ||]
401 liftTypedRaiseByLabel (Map_.Bin s k (h:|_hs) l r) =
402 [|| Map_.Bin s k $$h $$(liftTypedRaiseByLabel l) $$(liftTypedRaiseByLabel r) ||]
403
404 -- | Generate a 'retCode' 'Cont'inuation to be called with 'generateResume'.
405 -- Used when 'call' 'ret'urns.
406 -- The return 'v'alue is 'pushValue'ed on the 'valueStack'.
407 generateSuspend ::
408 {-k-}Gen inp (v ': vs) a ->
409 GenCtx inp vs a ->
410 CodeQ (Cont inp v a)
411 generateSuspend k ctx = [||
412 let _ = $$(TH.liftTyped $ "suspend raiseException=" <> show (exceptionsByName ctx)) in
413 \farInp farExp v !inp ->
414 $$(unGen k ctx
415 { valueStack = ValueStackCons (H.Term [||v||]) (valueStack ctx)
416 , input = [||inp||]
417 , farthestInput = [||farInp||]
418 , farthestExpecting = [||farExp||]
419 , checkedHorizon = 0
420 }
421 )
422 ||]
423
424 -- | Generate a call to the 'generateSuspend' continuation.
425 -- Used when 'call' 'ret'urns.
426 generateResume ::
427 CodeQ (Cont inp v a) ->
428 Gen inp (v ': vs) a
429 generateResume k = Gen
430 { minHorizon = \_hs -> 0
431 , exceptions = \_hs -> Map.empty
432 , unGen = \ctx -> [||
433 let _ = "resume" in
434 $$k
435 $$(farthestInput ctx)
436 $$(farthestExpecting ctx)
437 (let _ = "resume.genCode" in $$(genCode (valueStackHead (valueStack ctx))))
438 $$(input ctx)
439 ||]
440 }
441
442 instance InstrJoinable Gen where
443 defJoin (LetName n) joined k = k
444 { unGen = \ctx -> Code $ TH.unsafeTExpCoerce $ do
445 body <- TH.unTypeQ $ TH.examineCode $ [||
446 -- Called by 'generateResume'.
447 \farInp farExp v !inp ->
448 $$(unGen joined ctx
449 { valueStack = ValueStackCons (H.Term [||v||]) (valueStack ctx)
450 , input = [||inp||]
451 , farthestInput = [||farInp||]
452 , farthestExpecting = [||farExp||]
453 , checkedHorizon = 0
454 {- FIXME:
455 , catchStackByLabel = Map.mapWithKey
456 (\lbl () -> NE.singleton [||koByLabel Map.! lbl||])
457 (exceptions joined raiseLabelsByNameButSub)
458 -}
459 })
460 ||]
461 let decl = TH.FunD n [TH.Clause [] (TH.NormalB body) []]
462 expr <- TH.unTypeQ (TH.examineCode (unGen k ctx
463 { minHorizonByName =
464 -- 'joined' is 'refJoin'able within 'k'.
465 Map.insert n
466 -- By definition (in 'joinNext')
467 -- 'joined' is not recursively 'refJoin'able within 'joined',
468 -- hence no need to prevent against recursivity
469 -- as has to be done in 'defLet'.
470 (minHorizon joined (minHorizonByName ctx))
471 (minHorizonByName ctx)
472 , exceptionsByName =
473 Map.insert n
474 (exceptions joined (exceptionsByName ctx))
475 (exceptionsByName ctx)
476 }))
477 return (TH.LetE [decl] expr)
478 }
479 refJoin (LetName n) = (generateResume (Code (TH.unsafeTExpCoerce (return (TH.VarE n)))))
480 { minHorizon = (Map.! n)
481 , exceptions = (Map.! n)
482 }
483 instance InstrReadable Char Gen where
484 read farExp p = checkHorizon . checkToken farExp p
485
486 checkHorizon ::
487 TH.Lift (InputToken inp) =>
488 {-ok-}Gen inp vs a ->
489 Gen inp vs a
490 checkHorizon ok = ok
491 { minHorizon = \hs -> 1 + minHorizon ok hs
492 , exceptions = \hs -> Map.insert "fail" () $ exceptions ok hs
493 , unGen = \ctx0@GenCtx{} ->
494 let raiseByLbl =
495 NE.head (Map.findWithDefault (NE.singleton (defaultCatch ctx0)) "fail" (catchStackByLabel ctx0)) in
496 [||
497 -- Factorize failure code
498 let readFail = $$(raiseByLbl) in
499 $$(
500 let ctx = ctx0{catchStackByLabel = Map.adjust (\(_r:|rs) -> [||readFail||] :| rs) "fail" (catchStackByLabel ctx0)} in
501 if checkedHorizon ctx >= 1
502 then unGen ok ctx0{checkedHorizon = checkedHorizon ctx - 1}
503 else let minHoriz = minHorizon ok (minHorizonByName ctx) in
504 [||
505 if $$(moreInput ctx)
506 $$(if minHoriz > 0
507 then [||$$shiftRight minHoriz $$(input ctx)||]
508 else input ctx)
509 then $$(unGen ok ctx{checkedHorizon = minHoriz})
510 else let _ = "checkHorizon.else" in
511 -- TODO: return a resuming continuation (eg. Partial)
512 $$(unGen (fail [ErrorItemHorizon (minHoriz + 1)]) ctx)
513 ||]
514 )
515 ||]
516 }
517
518 checkToken ::
519 Ord (InputToken inp) =>
520 TH.Lift (InputToken inp) =>
521 [ErrorItem (InputToken inp)] ->
522 {-predicate-}TermInstr (InputToken inp -> Bool) ->
523 {-ok-}Gen inp (InputToken inp ': vs) a ->
524 Gen inp vs a
525 checkToken farExp p ok = ok
526 { exceptions = \hs -> Map.insert "fail" () $ exceptions ok hs
527 , unGen = \ctx -> [||
528 let !(# c, cs #) = $$(nextInput ctx) $$(input ctx) in
529 if $$(genCode p) c
530 then $$(unGen ok ctx
531 { valueStack = ValueStackCons (H.Term [||c||]) (valueStack ctx)
532 , input = [||cs||]
533 })
534 else let _ = "checkToken.else" in $$(unGen (fail farExp) ctx)
535 ||]
536 }