]> Git — Sourcephile - haskell/symantic-parser.git/blob - src/Symantic/Parser/Machine/Generate.hs
machine: simplify a bit the horizon checking
[haskell/symantic-parser.git] / src / Symantic / Parser / Machine / Generate.hs
1 {-# LANGUAGE AllowAmbiguousTypes #-}
2 {-# LANGUAGE StandaloneDeriving #-} -- For Show (ParsingError inp)
3 {-# LANGUAGE TemplateHaskell #-}
4 {-# LANGUAGE UnboxedTuples #-} -- For nextInput
5 {-# LANGUAGE UndecidableInstances #-} -- For Show (ParsingError inp)
6 module Symantic.Parser.Machine.Generate where
7
8 import Control.Monad (Monad(..))
9 import Data.Bool (Bool)
10 import Data.Char (Char)
11 import Data.Either (Either(..))
12 import Data.Function (($), (.))
13 import Data.Functor ((<$>))
14 import Data.Int (Int)
15 import Data.List (minimum)
16 import Data.Map (Map)
17 import Data.Maybe (Maybe(..))
18 import Data.Ord (Ord(..), Ordering(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Set (Set)
21 import Language.Haskell.TH (CodeQ, Code(..))
22 import Prelude ((+), (-))
23 import Text.Show (Show(..))
24 import qualified Data.Map.Strict as Map
25 import qualified Data.Set as Set
26 import qualified Language.Haskell.TH.Syntax as TH
27 -- import qualified Control.Monad.Trans.Writer as Writer
28
29 import Symantic.Univariant.Trans
30 import Symantic.Parser.Grammar.Combinators (ErrorItem(..))
31 import Symantic.Parser.Machine.Input
32 import Symantic.Parser.Machine.Instructions
33 import qualified Symantic.Parser.Haskell as H
34
35 genCode :: TermInstr a -> CodeQ a
36 genCode = trans
37
38 -- * Type 'Gen'
39 -- | Generate the 'CodeQ' parsing the input.
40 data Gen inp vs es a = Gen
41 { minHorizon :: Map TH.Name Horizon -> Horizon
42 -- ^ Synthetized (bottom-up) minimal input length
43 -- required by the parser to not fail.
44 -- This requires a 'minHorizonByName'
45 -- containing the minimal 'Horizon's of all the 'TH.Name's
46 -- this parser 'call's, 'jump's or 'refJoin's to.
47 , unGen ::
48 GenCtx inp vs es a ->
49 CodeQ (Either (ParsingError inp) a)
50 }
51
52 -- ** Type 'ParsingError'
53 data ParsingError inp
54 = ParsingErrorStandard
55 { parsingErrorOffset :: Offset
56 -- | Note that if an 'ErrorItemHorizon' greater than 1
57 -- is amongst the 'parsingErrorExpecting'
58 -- then this is only the 'InputToken'
59 -- at the begining of the expected 'Horizon'.
60 , parsingErrorUnexpected :: Maybe (InputToken inp)
61 , parsingErrorExpecting :: Set (ErrorItem (InputToken inp))
62 }
63 deriving instance Show (InputToken inp) => Show (ParsingError inp)
64
65 -- ** Type 'Offset'
66 type Offset = Int
67
68 -- ** Type 'Horizon'
69 -- | Synthetized minimal input length
70 -- required for a successful parsing.
71 -- Used with 'checkedHorizon' to factorize input length checks,
72 -- instead of checking the input length
73 -- one 'InputToken' at a time at each 'read'.
74 type Horizon = Offset
75
76 -- ** Type 'Cont'
77 type Cont inp v a =
78 {-farthestInput-}Cursor inp ->
79 {-farthestExpecting-}[ErrorItem (InputToken inp)] ->
80 v ->
81 Cursor inp ->
82 Either (ParsingError inp) a
83
84 -- ** Type 'FailHandler'
85 type FailHandler inp a =
86 {-failureInput-}Cursor inp ->
87 {-farthestInput-}Cursor inp ->
88 {-farthestExpecting-}[ErrorItem (InputToken inp)] ->
89 Either (ParsingError inp) a
90
91 {-
92 -- *** Type 'FarthestError'
93 data FarthestError inp = FarthestError
94 { farthestInput :: Cursor inp
95 , farthestExpecting :: [ErrorItem (InputToken inp)]
96 }
97 -}
98
99 -- | @('generateCode' input mach)@ generates @TemplateHaskell@ code
100 -- parsing the given 'input' according to the given 'Machine'.
101 generateCode ::
102 forall inp ret.
103 Ord (InputToken inp) =>
104 Show (InputToken inp) =>
105 TH.Lift (InputToken inp) =>
106 -- InputToken inp ~ Char =>
107 Input inp =>
108 CodeQ inp ->
109 Show (Cursor inp) =>
110 Gen inp '[] ('Succ 'Zero) ret ->
111 CodeQ (Either (ParsingError inp) ret)
112 generateCode input k = [||
113 -- Pattern bindings containing unlifted types
114 -- should use an outermost bang pattern.
115 let !(# init, readMore, readNext #) = $$(cursorOf input) in
116 let finalRet = \_farInp _farExp v _inp -> Right v in
117 let finalFail _failInp !farInp !farExp =
118 Left ParsingErrorStandard
119 { parsingErrorOffset = offset farInp
120 , parsingErrorUnexpected =
121 if readMore farInp
122 then Just (let (# c, _ #) = readNext farInp in c)
123 else Nothing
124 , parsingErrorExpecting = Set.fromList farExp
125 } in
126 $$(unGen k GenCtx
127 { valueStack = ValueStackEmpty
128 , failStack = FailStackCons [||finalFail||] FailStackEmpty
129 , retCode = [||finalRet||]
130 , input = [||init||]
131 , nextInput = [||readNext||]
132 , moreInput = [||readMore||]
133 -- , farthestError = [||Nothing||]
134 , farthestInput = [||init||]
135 , farthestExpecting = [|| [] ||]
136 , checkedHorizon = 0
137 , minHorizonByName = Map.empty
138 })
139 ||]
140
141 -- ** Type 'GenCtx'
142 -- | This is an inherited (top-down) context
143 -- only present at compile-time, to build TemplateHaskell splices.
144 data GenCtx inp vs (es::Peano) a =
145 ( TH.Lift (InputToken inp)
146 , Cursorable (Cursor inp)
147 , Show (InputToken inp)
148 -- , InputToken inp ~ Char
149 ) => GenCtx
150 { valueStack :: ValueStack vs
151 , failStack :: FailStack inp a es
152 --, failStacks :: FailStack inp es a
153 , retCode :: CodeQ (Cont inp a a)
154 , input :: CodeQ (Cursor inp)
155 , moreInput :: CodeQ (Cursor inp -> Bool)
156 , nextInput :: CodeQ (Cursor inp -> (# InputToken inp, Cursor inp #))
157 , farthestInput :: CodeQ (Cursor inp)
158 , farthestExpecting :: CodeQ [ErrorItem (InputToken inp)]
159 -- | Remaining horizon already checked.
160 -- Updated by 'checkHorizon'
161 -- and reset elsewhere when needed.
162 , checkedHorizon :: Offset
163 -- | Minimal horizon for each 'subroutine' or 'defJoin'.
164 -- This can be done as an inherited attribute because
165 -- 'OverserveSharing' introduces 'def' as an ancestor node
166 -- of all the 'ref's pointing to it.
167 -- Same for 'defJoin' and its 'refJoin's.
168 , minHorizonByName :: Map TH.Name Offset
169 }
170
171 -- ** Type 'ValueStack'
172 data ValueStack vs where
173 ValueStackEmpty :: ValueStack '[]
174 ValueStackCons ::
175 { valueStackHead :: TermInstr v
176 , valueStackTail :: ValueStack vs
177 } -> ValueStack (v ': vs)
178
179 -- ** Type 'FailStack'
180 data FailStack inp a es where
181 FailStackEmpty :: FailStack inp a 'Zero
182 FailStackCons ::
183 { failStackHead :: CodeQ (FailHandler inp a)
184 , failStackTail :: FailStack inp a es
185 } ->
186 FailStack inp a ('Succ es)
187
188 instance Stackable Gen where
189 push x k = k
190 { unGen = \ctx -> unGen k ctx
191 { valueStack = ValueStackCons x (valueStack ctx) }
192 }
193 pop k = k
194 { unGen = \ctx -> unGen k ctx
195 { valueStack = valueStackTail (valueStack ctx) }
196 }
197 liftI2 f k = k
198 { unGen = \ctx -> unGen k ctx
199 { valueStack =
200 let ValueStackCons y (ValueStackCons x xs) = valueStack ctx in
201 ValueStackCons (f H.:@ x H.:@ y) xs
202 }
203 }
204 swap k = k
205 { unGen = \ctx -> unGen k ctx
206 { valueStack =
207 let ValueStackCons y (ValueStackCons x xs) = valueStack ctx in
208 ValueStackCons x (ValueStackCons y xs)
209 }
210 }
211 instance Branchable Gen where
212 caseI kx ky = Gen
213 { minHorizon = \ls ->
214 minHorizon kx ls `min` minHorizon ky ls
215 , unGen = \ctx ->
216 let ValueStackCons v vs = valueStack ctx in
217 [||
218 case $$(genCode v) of
219 Left x -> $$(unGen kx ctx{ valueStack = ValueStackCons (H.Term [||x||]) vs })
220 Right y -> $$(unGen ky ctx{ valueStack = ValueStackCons (H.Term [||y||]) vs })
221 ||]
222 }
223 choices fs ks kd = Gen
224 { minHorizon = \hs -> minimum $
225 minHorizon kd hs :
226 (($ hs) . minHorizon <$> ks)
227 , unGen = \ctx ->
228 let ValueStackCons v vs = valueStack ctx in
229 go ctx{valueStack = vs} v fs ks
230 }
231 where
232 go ctx x (f:fs') (k:ks') = [||
233 if $$(genCode (f H.:@ x))
234 then $$(unGen k ctx)
235 else $$(go ctx x fs' ks')
236 ||]
237 go ctx _ _ _ = unGen kd ctx
238 instance Failable Gen where
239 fail failExp = Gen
240 { minHorizon = \_hs -> 0
241 , unGen = \ctx@GenCtx{} -> [||
242 let (# farInp, farExp #) =
243 case $$compareOffset $$(farthestInput ctx) $$(input ctx) of
244 LT -> (# $$(input ctx), failExp #)
245 EQ -> (# $$(farthestInput ctx), ($$(farthestExpecting ctx) <> failExp) #)
246 GT -> (# $$(farthestInput ctx), $$(farthestExpecting ctx) #) in
247 $$(failStackHead (failStack ctx))
248 $$(input ctx) farInp farExp
249 ||]
250 }
251 popFail k = k
252 { unGen = \ctx ->
253 unGen k ctx{failStack = failStackTail (failStack ctx)}
254 }
255 catchFail ok ko = Gen
256 { minHorizon = \ls -> minHorizon ok ls `min` minHorizon ko ls
257 , unGen = \ctx@GenCtx{} -> unGen ok ctx
258 { failStack = FailStackCons [|| \ !failInp !farInp !farExp ->
259 -- trace ("catchFail: " <> "farExp="<>show farExp) $
260 $$(unGen ko ctx
261 -- Push the input as it was when entering the catchFail.
262 { valueStack = ValueStackCons (H.Term (input ctx)) (valueStack ctx)
263 -- Move the input to the failing position.
264 , input = [||failInp||]
265 -- Set the farthestInput to the farthest computed by 'fail'
266 , farthestInput = [||farInp||]
267 , farthestExpecting = [||farExp||]
268 })
269 ||] (failStack ctx)
270 }
271 }
272 instance Inputable Gen where
273 loadInput k = k
274 { unGen = \ctx ->
275 let ValueStackCons input vs = valueStack ctx in
276 unGen k ctx
277 { valueStack = vs
278 , input = genCode input
279 , checkedHorizon = 0
280 }
281 }
282 pushInput k = k
283 { unGen = \ctx ->
284 unGen k ctx{valueStack = ValueStackCons (H.Term (input ctx)) (valueStack ctx)}
285 }
286 instance Routinable Gen where
287 subroutine (LetName n) sub k = Gen
288 { minHorizon = minHorizon k
289 , unGen = \ctx -> Code $ TH.unsafeTExpCoerce $ do
290 -- 'sub' is recursively 'call'able within 'sub',
291 -- but its maximal 'minHorizon' is not known yet.
292 let minHorizonByNameButSub = Map.insert n 0 (minHorizonByName ctx)
293 body <- TH.unTypeQ $ TH.examineCode $ [|| -- buildRec in Parsley
294 -- subroutine called by 'call' or 'jump'
295 \ !ok{-from generateSuspend or retCode-}
296 !inp
297 !ko{-from failStackHead-} ->
298 $$(unGen sub ctx
299 { valueStack = ValueStackEmpty
300 , failStack = FailStackCons [||ko||] FailStackEmpty
301 , input = [||inp||]
302 , retCode = [||ok||]
303
304 -- These are passed by the caller via 'ok' or 'ko'
305 -- , farthestInput =
306 -- , farthestExpecting =
307
308 -- Some callers can call this subroutine
309 -- with zero checkedHorizon, hence use this minimum.
310 -- TODO: maybe it could be improved a bit
311 -- by taking the minimum of the checked horizons
312 -- before all the 'call's and 'jump's to this subroutine.
313 , checkedHorizon = 0
314 , minHorizonByName = minHorizonByNameButSub
315 })
316 ||]
317 let decl = TH.FunD n [TH.Clause [] (TH.NormalB body) []]
318 expr <- TH.unTypeQ (TH.examineCode (unGen k ctx
319 { minHorizonByName =
320 -- 'sub' is 'call'able within 'k'.
321 Map.insert n
322 (minHorizon sub minHorizonByNameButSub)
323 (minHorizonByName ctx)
324 }))
325 return (TH.LetE [decl] expr)
326 }
327 jump (LetName n) = Gen
328 { minHorizon = (Map.! n)
329 , unGen = \ctx -> [||
330 let _ = "jump" in
331 $$(Code (TH.unsafeTExpCoerce (return (TH.VarE n))))
332 {-ok-}$$(retCode ctx)
333 $$(input ctx)
334 $$(failStackHead (failStack ctx))
335 ||]
336 }
337 call (LetName n) k = k
338 { minHorizon = (Map.! n)
339 , unGen = \ctx -> [||
340 let _ = "call" in
341 $$(Code (TH.unsafeTExpCoerce (return (TH.VarE n))))
342 {-ok-}$$(generateSuspend k ctx)
343 $$(input ctx)
344 $$(failStackHead (failStack ctx))
345 ||]
346 }
347 ret = Gen
348 { minHorizon = \_hs -> 0
349 , unGen = \ctx -> unGen (generateResume (retCode ctx)) ctx
350 }
351
352 -- | Generate a continuation to be called with 'generateResume',
353 -- used when 'call' 'ret'urns.
354 -- The return 'v'alue is 'push'ed on the 'valueStack'.
355 generateSuspend ::
356 {-k-}Gen inp (v ': vs) es a ->
357 GenCtx inp vs es a ->
358 CodeQ (Cont inp v a)
359 generateSuspend k ctx = [||
360 let _ = "suspend" in
361 \farInp farExp v !inp ->
362 $$(unGen k ctx
363 { valueStack = ValueStackCons (H.Term [||v||]) (valueStack ctx)
364 , input = [||inp||]
365 , farthestInput = [||farInp||]
366 , farthestExpecting = [||farExp||]
367 , checkedHorizon = 0
368 }
369 )
370 ||]
371
372 -- | Generate a call to the 'generateSuspend' continuation.
373 -- Used when 'call' 'ret'urns.
374 generateResume ::
375 CodeQ (Cont inp v a) ->
376 Gen inp (v ': vs) es a
377 generateResume k = Gen
378 { minHorizon = \_hs -> 0
379 , unGen = \ctx -> [||
380 let _ = "resume" in
381 $$k
382 $$(farthestInput ctx)
383 $$(farthestExpecting ctx)
384 (let _ = "resume.genCode" in $$(genCode (valueStackHead (valueStack ctx))))
385 $$(input ctx)
386 ||]
387 }
388
389 instance Joinable Gen where
390 defJoin (LetName n) joined k = k
391 { minHorizon = minHorizon k
392 , unGen = \ctx -> Code $ TH.unsafeTExpCoerce $ do
393 body <- TH.unTypeQ $ TH.examineCode $ [||
394 \farInp farExp v !inp ->
395 $$(unGen joined ctx
396 { valueStack = ValueStackCons (H.Term [||v||]) (valueStack ctx)
397 , input = [||inp||]
398 , farthestInput = [||farInp||]
399 , farthestExpecting = [||farExp||]
400 , checkedHorizon = 0
401 })
402 ||]
403 let decl = TH.FunD n [TH.Clause [] (TH.NormalB body) []]
404 expr <- TH.unTypeQ (TH.examineCode (unGen k ctx
405 { minHorizonByName =
406 -- 'joined' is 'refJoin'able within 'k'.
407 Map.insert n
408 -- By definition (in 'joinNext')
409 -- 'joined' is not recursively 'refJoin'able within 'joined',
410 -- hence no need to prevent against recursivity
411 -- as has to be done in 'subroutine'.
412 (minHorizon joined (minHorizonByName ctx))
413 (minHorizonByName ctx)
414 }))
415 return (TH.LetE [decl] expr)
416 }
417 refJoin (LetName n) = (generateResume (Code (TH.unsafeTExpCoerce (return (TH.VarE n)))))
418 { minHorizon = (Map.! n)
419 }
420 instance Readable Char Gen where
421 read farExp p = checkHorizon . checkToken farExp p
422
423 checkHorizon ::
424 TH.Lift (InputToken inp) =>
425 {-ok-}Gen inp vs ('Succ es) a ->
426 Gen inp vs ('Succ es) a
427 checkHorizon ok = ok
428 { minHorizon = \hs -> 1 + minHorizon ok hs
429 , unGen = \ctx0@GenCtx{failStack = FailStackCons e es} -> [||
430 -- Factorize failure code
431 let readFail = $$(e) in
432 $$(
433 let ctx = ctx0{ failStack = FailStackCons [||readFail||] es } in
434 if checkedHorizon ctx >= 1
435 then unGen ok ctx0{checkedHorizon = checkedHorizon ctx - 1}
436 else let minHoriz = minHorizon ok (minHorizonByName ctx) in
437 [||
438 if $$(moreInput ctx)
439 $$(if minHoriz > 0
440 then [||$$shiftRight minHoriz $$(input ctx)||]
441 else input ctx)
442 then $$(unGen ok ctx{checkedHorizon = minHoriz})
443 else let _ = "checkHorizon.else" in
444 $$(unGen (fail [ErrorItemHorizon (minHoriz + 1)]) ctx)
445 ||]
446 )
447 ||]
448 }
449
450 checkToken ::
451 forall inp vs es a.
452 Ord (InputToken inp) =>
453 TH.Lift (InputToken inp) =>
454 [ErrorItem (InputToken inp)] ->
455 {-predicate-}TermInstr (InputToken inp -> Bool) ->
456 {-ok-}Gen inp (InputToken inp ': vs) ('Succ es) a ->
457 Gen inp vs ('Succ es) a
458 checkToken farExp p ok = ok
459 { unGen = \ctx -> [||
460 let !(# c, cs #) = $$(nextInput ctx) $$(input ctx) in
461 if $$(genCode p) c
462 then $$(unGen ok ctx
463 { valueStack = ValueStackCons (H.Term [||c||]) (valueStack ctx)
464 , input = [||cs||]
465 })
466 else let _ = "checkToken.else" in $$(unGen (fail farExp) ctx)
467 ||]
468 }
469