]> Git — Sourcephile - haskell/symantic.git/blob - Language/Symantic/Parsing/Grammar.hs
Remove Alternative uses in grammars.
[haskell/symantic.git] / Language / Symantic / Parsing / Grammar.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE StandaloneDeriving #-}
6 -- | This module defines symantics
7 -- for regular or context-free grammars.
8 --
9 -- The default grammar can be printed in 'EBNF'
10 -- with: @cabal test ebnf --show-details=always@.
11 module Language.Symantic.Parsing.Grammar where
12
13 import Control.Applicative (Applicative(..))
14 import Control.Monad
15 import qualified Data.Bool as Bool
16 import qualified Data.Char as Char
17 import Data.Foldable hiding (any)
18 import Data.Semigroup hiding (option)
19 import Data.String (IsString(..))
20 import Data.Text (Text)
21 import Prelude hiding (any)
22
23 -- * Class 'Gram_Rule'
24 type Id a = a -> a
25 class Gram_Rule g where
26 rule :: Text -> Id (g a)
27 rule _n = id
28 rule1 :: Text -> Id (g a -> g b)
29 rule1 _n g = g
30 rule2 :: Text -> Id (g a -> g b -> g c)
31 rule2 _n g = g
32 rule3 :: Text -> Id (g a -> g b -> g c -> g d)
33 rule3 _n g = g
34 rule4 :: Text -> Id (g a -> g b -> g c -> g d -> g e)
35 rule4 _n g = g
36
37 -- * Type 'Terminal'
38 -- | Terminal grammar.
39 newtype Terminal g a
40 = Terminal { unTerminal :: g a }
41 deriving (Functor, Gram_Terminal)
42 deriving instance Gram_Rule g => Gram_Rule (Terminal g)
43
44 -- ** Class 'Gram_Terminal'
45 -- | Symantics for terminal grammars.
46 class Gram_Terminal g where
47 any :: g Char
48 but :: Terminal g Char -> Terminal g Char -> Terminal g Char
49 eoi :: g ()
50 char :: Char -> g Char
51 string :: String -> g String
52 unicat :: Unicat -> g Char
53 range :: (Char, Char) -> g Char
54 -- string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "")
55 -- string [] = pure []
56 -- string (c:cs) = (:) <$> char c <*> string cs
57
58 -- *** Type 'Unicat'
59 -- | Unicode category.
60 data Unicat
61 = Unicat_Letter
62 | Unicat_Mark
63 | Unicat_Number
64 | Unicat_Punctuation
65 | Unicat_Symbol
66 | Unicat Char.GeneralCategory
67 deriving (Eq, Show)
68
69 unicode_categories :: Unicat -> [Char.GeneralCategory]
70 unicode_categories c =
71 case c of
72 Unicat_Letter ->
73 [ Char.UppercaseLetter
74 , Char.LowercaseLetter
75 , Char.TitlecaseLetter
76 , Char.ModifierLetter
77 , Char.OtherLetter
78 ]
79 Unicat_Mark ->
80 [ Char.NonSpacingMark
81 , Char.SpacingCombiningMark
82 , Char.EnclosingMark
83 ]
84 Unicat_Number ->
85 [ Char.DecimalNumber
86 , Char.LetterNumber
87 , Char.OtherNumber
88 ]
89 Unicat_Punctuation ->
90 [ Char.ConnectorPunctuation
91 , Char.DashPunctuation
92 , Char.OpenPunctuation
93 , Char.ClosePunctuation
94 , Char.OtherPunctuation
95 ]
96 Unicat_Symbol ->
97 [ Char.MathSymbol
98 , Char.CurrencySymbol
99 , Char.ModifierSymbol
100 , Char.OtherSymbol
101 ]
102 Unicat cat -> [cat]
103
104 -- * Type 'Reg'
105 -- | Left or right regular grammar.
106 newtype Reg (lr::LR) g a = Reg { unReg :: g a }
107 deriving (IsString, Functor, Gram_Terminal)
108 deriving instance Alter g => Alter (Reg lr g)
109 deriving instance Gram_Rule g => Gram_Rule (Reg lr g)
110 deriving instance (Functor g, Alter g, Gram_RegL g) => Gram_RegL (RegL g)
111 deriving instance (Functor g, Alter g, Gram_RegR g) => Gram_RegR (RegR g)
112
113 reg_of_term :: Terminal g a -> Reg lr g a
114 reg_of_term (Terminal g) = Reg g
115
116 -- ** Type 'LR'
117 data LR
118 = L -- ^ Left
119 | R -- ^ Right
120 deriving (Eq, Show)
121 type RegL = Reg 'L
122 type RegR = Reg 'R
123
124 -- ** Class 'Alter'
125 -- | Like 'Alternative' but without the 'Applicative' super-class,
126 -- because a regular grammar is not closed under 'Applicative'.
127 -- And also because the alternative operator has to backtrack
128 -- when the first alternative fails.
129 class Alter g where
130 empty :: g a
131 (<+>) :: g a -> g a -> g a
132 choice :: [g a] -> g a
133 choice = foldr (<+>) empty
134 deriving instance Alter p => Alter (Terminal p)
135
136 infixl 3 <+>
137
138 -- ** Class 'Gram_RegR'
139 -- | Symantics for right regular grammars.
140 class (Functor g, Alter g) => Gram_RegR g where
141 (.*>) :: Terminal g (a -> b) -> RegR g a -> RegR g b
142 manyR :: Terminal g a -> RegR g [a]
143 manyR g = (:) <$> g .*> manyR g <+> empty
144 someR :: Terminal g a -> RegR g [a]
145 someR g = (:) <$> g .*> manyR g
146 infixl 4 .*>
147
148 -- ** Class 'Gram_RegL'
149 -- | Symantics for left regular grammars.
150 class (Functor g, Alter g) => Gram_RegL g where
151 (<*.) :: RegL g (a -> b) -> Terminal g a -> RegL g b
152 manyL :: Terminal g a -> RegL g [a]
153 manyL g' = reverse <$> go g'
154 where go g = flip (:) <$> go g <*. g <+> empty
155 someL :: Terminal g a -> RegL g [a]
156 someL g = (\cs c -> cs ++ [c]) <$> manyL g <*. g
157 infixl 4 <*.
158
159 -- * Class 'Alt'
160 class (Alter g, Applicative g) => Alt g where
161 option :: a -> g a -> g a
162 option x g = g <+> pure x
163 many :: g a -> g [a]
164 many a = some a <+> pure []
165 some :: g a -> g [a]
166 some a = (:) <$> a <*> many a
167 skipMany :: g a -> g ()
168 skipMany = void . many
169 --manyTill :: g a -> g end -> g [a]
170 --manyTill g end = go where go = ([] <$ end) <|> ((:) <$> g <*> go)
171
172 -- * Class 'App'
173 class Applicative g => App g where
174 between :: g open -> g close -> g a -> g a
175 between open close g = open *> g <* close
176
177 -- * Type 'CF'
178 -- | Context-free grammar.
179 newtype CF g a = CF { unCF :: g a }
180 deriving (IsString, Functor, Gram_Terminal, Applicative, App)
181 deriving instance Alter g => Alter (CF g)
182 deriving instance Alt g => Alt (CF g)
183 deriving instance Gram_Rule g => Gram_Rule (CF g)
184 deriving instance Gram_RegL g => Gram_RegL (CF g)
185 deriving instance Gram_RegR g => Gram_RegR (CF g)
186 deriving instance Gram_CF g => Gram_CF (CF g)
187
188 cf_of_term :: Terminal g a -> CF g a
189 cf_of_term (Terminal g) = CF g
190
191 cf_of_reg :: Reg lr g a -> CF g a
192 cf_of_reg (Reg g) = CF g
193
194 -- ** Class 'Gram_CF'
195 -- | Symantics for context-free grammars.
196 class Gram_CF g where
197 -- | NOTE: CFL ∩ RL is a CFL.
198 -- See ISBN 81-7808-347-7, Theorem 7.27, g.286
199 (<&) :: CF g (a -> b) -> Reg lr g a -> CF g b
200 (&>) :: Reg lr g (a -> b) -> CF g a -> CF g b
201 -- | NOTE: CFL - RL is a CFL.
202 -- See ISBN 81-7808-347-7, Theorem 7.29, g.289
203 minus :: CF g a -> Reg lr g b -> CF g a
204 infixl 4 <&
205 infixl 4 &>
206
207 -- * Class 'Gram_Meta'
208 class Gram_Meta meta g where
209 metaG :: g (meta -> a) -> g a
210 instance Gram_Meta meta g => Gram_Meta meta (CF g) where
211 metaG = CF . metaG . unCF
212
213 -- * Class 'Gram_Lexer'
214 class
215 ( Alt g
216 , Alter g
217 , App g
218 , Gram_CF g
219 , Gram_Rule g
220 , Gram_Terminal g
221 ) => Gram_Lexer g where
222 commentable :: g () -> g () -> g () -> g ()
223 commentable = rule3 "commentable" $ \g line block ->
224 skipMany $ choice [g, line, block]
225 comment_line :: CF g String -> CF g String
226 comment_line prefix = rule "comment_line" $
227 prefix *> many (any `minus` (void (char '\n') <+> eoi))
228 comment_block :: CF g String -> Reg lr g String -> CF g String
229 comment_block start end = rule "comment_block" $
230 start *> many (any `minus` void end)
231 lexeme :: CF g a -> CF g a
232 lexeme = rule1 "lexeme" $ \g ->
233 g <* commentable
234 (void $ char ' ')
235 (void $ comment_line (string "--"))
236 (void $ comment_block (string "{-") (string "-}"))
237 parens :: CF g a -> CF g a
238 parens = rule1 "parens" $
239 between
240 (lexeme $ string "(")
241 (lexeme $ string ")")
242 operators
243 :: CF g a -- ^ expression
244 -> CF g (Unifix, a -> a) -- ^ prefix operator
245 -> CF g (Infix , a -> a -> a) -- ^ infix operator
246 -> CF g (Unifix, a -> a) -- ^ postfix operator
247 -> CF g (Either Error_Fixity a)
248 operators g prG iG poG =
249 (evalOpTree <$>)
250 <$> go g prG iG poG
251 where
252 go
253 :: CF g a
254 -> CF g (Unifix, a -> a)
255 -> CF g (Infix , a -> a -> a)
256 -> CF g (Unifix, a -> a)
257 -> CF g (Either Error_Fixity (OpTree a))
258 go = rule4 "operators" $ \aG preG inG postG ->
259 (\pres a posts ->
260 let nod_a =
261 foldr insert_unifix
262 (foldl' (flip insert_unifix) (OpNode0 a) posts)
263 pres
264 in \case
265 Just (in_, b) -> insert_infix nod_a in_ b
266 Nothing -> Right nod_a)
267 <$> many preG
268 <*> aG
269 <*> many postG
270 <*> option Nothing (curry Just <$> inG <*> go aG preG inG postG)
271
272 insert_unifix :: (Unifix, a -> a) -> OpTree a -> OpTree a
273 insert_unifix a@(uni_a@(Prefix prece_a), op_a) nod_b =
274 case nod_b of
275 OpNode0{} -> OpNode1 uni_a op_a nod_b
276 OpNode1 Prefix{} _op_b _nod -> OpNode1 uni_a op_a nod_b
277 OpNode1 uni_b@(Postfix prece_b) op_b nod ->
278 case prece_b `compare` prece_a of
279 GT -> OpNode1 uni_a op_a nod_b
280 EQ -> OpNode1 uni_a op_a nod_b
281 LT -> OpNode1 uni_b op_b $ insert_unifix a nod
282 OpNode2 inf_b op_b l r ->
283 case infix_prece inf_b `compare` prece_a of
284 GT -> OpNode1 uni_a op_a nod_b
285 EQ -> OpNode1 uni_a op_a nod_b
286 LT -> OpNode2 inf_b op_b (insert_unifix a l) r
287 insert_unifix a@(uni_a@(Postfix prece_a), op_a) nod_b =
288 case nod_b of
289 OpNode0{} -> OpNode1 uni_a op_a nod_b
290 OpNode1 uni_b@(Prefix prece_b) op_b nod ->
291 case prece_b `compare` prece_a of
292 GT -> OpNode1 uni_a op_a nod_b
293 EQ -> OpNode1 uni_a op_a nod_b
294 LT -> OpNode1 uni_b op_b $ insert_unifix a nod
295 OpNode1 Postfix{} _op_b _nod -> OpNode1 uni_a op_a nod_b
296 OpNode2 inf_b op_b l r ->
297 case infix_prece inf_b `compare` prece_a of
298 GT -> OpNode1 uni_a op_a nod_b
299 EQ -> OpNode1 uni_a op_a nod_b
300 LT -> OpNode2 inf_b op_b l (insert_unifix a r)
301
302 insert_infix
303 :: OpTree a
304 -> (Infix, a -> a -> a)
305 -> Either Error_Fixity (OpTree a)
306 -> Either Error_Fixity (OpTree a)
307 insert_infix nod_a in_@(inf_a, op_a) e_nod_b = do
308 nod_b <- e_nod_b
309 case nod_b of
310 OpNode0{} -> Right $ OpNode2 inf_a op_a nod_a nod_b
311 OpNode1 uni_b op_b nod ->
312 case unifix_prece uni_b `compare` infix_prece inf_a of
313 EQ -> Right $ OpNode2 inf_a op_a nod_a nod_b
314 GT -> Right $ OpNode2 inf_a op_a nod_a nod_b
315 LT -> do
316 n <- insert_infix nod_a in_ (Right nod)
317 Right $ OpNode1 uni_b op_b n
318 OpNode2 inf_b op_b l r ->
319 case infix_prece inf_b `compare` infix_prece inf_a of
320 GT -> Right $ OpNode2 inf_a op_a nod_a nod_b
321 LT -> do
322 n <- insert_infix nod_a in_ (Right l)
323 Right $ OpNode2 inf_b op_b n r
324 EQ ->
325 let ass = \case
326 AssocL -> L
327 AssocR -> R
328 AssocB lr -> lr in
329 case (ass <$> infix_assoc inf_b, ass <$> infix_assoc inf_a) of
330 (Just L, Just L) -> do
331 n <- insert_infix nod_a in_ (Right l)
332 Right $ OpNode2 inf_b op_b n r
333 (Just R, Just R) ->
334 Right $ OpNode2 inf_a op_a nod_a nod_b
335 _ -> Left $ Error_Fixity_Infix_not_combinable inf_a inf_b
336 -- NOTE: non-associating infix ops
337 -- of the same precedence cannot be mixed.
338 infixrG :: CF g a -> CF g (a -> a -> a) -> CF g a
339 infixrG = rule2 "infixr" $ \g opG ->
340 (\a -> \case
341 Just (op, b) -> a `op` b
342 Nothing -> a)
343 <$> g
344 <*> option Nothing (curry Just <$> opG <*> infixrG g opG)
345 infixlG :: CF g a -> CF g (a -> a -> a) -> CF g a
346 infixlG = rule2 "infixl" $ \g opG ->
347 -- NOTE: infixl uses the same grammar than infixr,
348 -- but build the parsed value by applying
349 -- the operator in the opposite way.
350 ($ id) <$> go g opG
351 where
352 go :: CF g a -> CF g (a -> a -> a) -> CF g ((a -> a) -> a)
353 go g opG =
354 (\a -> \case
355 Just (op, kb) -> \k -> kb (k a `op`)
356 Nothing -> ($ a))
357 <$> g
358 <*> option Nothing (curry Just <$> opG <*> go g opG)
359 inside :: (a -> b) -> CF g begin -> CF g a -> CF g end -> CF g b -> CF g b
360 inside f = rule4 "inside" $ \begin i end n ->
361 (f <$ begin <*> i <* end) <+> n
362 symbol :: String -> CF g String
363 symbol = lexeme . string
364
365 deriving instance Gram_Lexer g => Gram_Lexer (CF g)
366
367 -- ** Type 'Error_Fixity'
368 data Error_Fixity
369 = Error_Fixity_Infix_not_combinable Infix Infix
370 | Error_Fixity_NeedPostfixOrInfix
371 | Error_Fixity_NeedPrefix
372 | Error_Fixity_NeedPostfix
373 | Error_Fixity_NeedInfix
374 deriving (Eq, Show)
375
376 -- ** Type 'NeedFixity'
377 data NeedFixity
378 = NeedPrefix
379 | NeedPostfix
380 | NeedPostfixOrInfix
381 deriving (Eq, Ord, Show)
382
383 -- ** Type 'Fixity'
384 data Fixity a
385 = FixityPrefix Unifix (a -> a)
386 | FixityPostfix Unifix (a -> a)
387 | FixityInfix Infix (a -> a -> a)
388
389 -- ** Type 'Precedence'
390 type Precedence = Int
391
392 -- ** Type 'Associativity'
393 -- type Associativity = LR
394 data Associativity
395 = AssocL -- ^ Associate to the left: @a ¹ b ² c == (a ¹ b) ² c@
396 | AssocR -- ^ Associate to the right: @a ¹ b ² c == a ¹ (b ² c)@
397 | AssocB LR -- ^ Associate to both side, but to 'LR' when reading.
398 deriving (Eq, Show)
399
400 -- ** Type 'Unifix'
401 data Unifix
402 = Prefix { unifix_prece :: Precedence }
403 | Postfix { unifix_prece :: Precedence }
404 deriving (Eq, Show)
405
406 -- ** Type 'Infix'
407 data Infix
408 = Infix
409 { infix_assoc :: Maybe Associativity
410 , infix_prece :: Precedence
411 } deriving (Eq, Show)
412
413 infixL :: Precedence -> Infix
414 infixL = Infix (Just AssocL)
415
416 infixR :: Precedence -> Infix
417 infixR = Infix (Just AssocR)
418
419 infixB :: LR -> Precedence -> Infix
420 infixB = Infix . Just . AssocB
421
422 infixN :: Precedence -> Infix
423 infixN = Infix Nothing
424
425 infixN0 :: Infix
426 infixN0 = infixN 0
427
428 infixN5 :: Infix
429 infixN5 = infixN 5
430
431 infix_paren
432 :: (Semigroup s, IsString s)
433 => (Infix, LR) -> Infix -> s -> s
434 infix_paren (po, lr) op s =
435 if infix_prece op < infix_prece po
436 || infix_prece op == infix_prece po
437 && Bool.not associate
438 then fromString "(" <> s <> fromString ")"
439 else s
440 where
441 associate =
442 case (lr, infix_assoc po) of
443 (_, Just AssocB{}) -> True
444 (L, Just AssocL) -> True
445 (R, Just AssocR) -> True
446 _ -> False
447
448 -- ** Type 'OpTree'
449 data OpTree a
450 = OpNode0 a
451 | OpNode1 Unifix (a -> a) (OpTree a)
452 | OpNode2 Infix (a -> a -> a) (OpTree a) (OpTree a)
453
454 -- | Collapse an 'OpTree'.
455 evalOpTree :: OpTree a -> a
456 evalOpTree (OpNode0 a) = a
457 evalOpTree (OpNode1 _uni op n) = op $ evalOpTree n
458 evalOpTree (OpNode2 _inf op l r) = evalOpTree l `op` evalOpTree r