]> Git — Sourcephile - haskell/symantic.git/blob - Language/Symantic/Parsing/Grammar.hs
Add optional.
[haskell/symantic.git] / Language / Symantic / Parsing / Grammar.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE StandaloneDeriving #-}
6 -- | This module defines symantics
7 -- for regular or context-free grammars.
8 --
9 -- The default grammar can be printed in 'EBNF'
10 -- with: @cabal test ebnf --show-details=always@.
11 module Language.Symantic.Parsing.Grammar where
12
13 import Control.Applicative (Applicative(..))
14 import Control.Monad
15 import qualified Data.Bool as Bool
16 import qualified Data.Char as Char
17 import Data.Foldable hiding (any)
18 import Data.Semigroup hiding (option)
19 import Data.String (IsString(..))
20 import Data.Text (Text)
21 import Prelude hiding (any)
22
23 -- * Class 'Gram_Rule'
24 type Id a = a -> a
25 class Gram_Rule g where
26 rule :: Text -> Id (g a)
27 rule _n = id
28 rule1 :: Text -> Id (g a -> g b)
29 rule1 _n g = g
30 rule2 :: Text -> Id (g a -> g b -> g c)
31 rule2 _n g = g
32 rule3 :: Text -> Id (g a -> g b -> g c -> g d)
33 rule3 _n g = g
34 rule4 :: Text -> Id (g a -> g b -> g c -> g d -> g e)
35 rule4 _n g = g
36
37 -- * Type 'Terminal'
38 -- | Terminal grammar.
39 newtype Terminal g a
40 = Terminal { unTerminal :: g a }
41 deriving (Functor, Gram_Terminal)
42 deriving instance Gram_Rule g => Gram_Rule (Terminal g)
43
44 -- ** Class 'Gram_Terminal'
45 -- | Symantics for terminal grammars.
46 class Gram_Terminal g where
47 any :: g Char
48 but :: Terminal g Char -> Terminal g Char -> Terminal g Char
49 eoi :: g ()
50 char :: Char -> g Char
51 string :: String -> g String
52 unicat :: Unicat -> g Char
53 range :: (Char, Char) -> g Char
54 -- string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "")
55 -- string [] = pure []
56 -- string (c:cs) = (:) <$> char c <*> string cs
57
58 -- *** Type 'Unicat'
59 -- | Unicode category.
60 data Unicat
61 = Unicat_Letter
62 | Unicat_Mark
63 | Unicat_Number
64 | Unicat_Punctuation
65 | Unicat_Symbol
66 | Unicat Char.GeneralCategory
67 deriving (Eq, Show)
68
69 unicode_categories :: Unicat -> [Char.GeneralCategory]
70 unicode_categories c =
71 case c of
72 Unicat_Letter ->
73 [ Char.UppercaseLetter
74 , Char.LowercaseLetter
75 , Char.TitlecaseLetter
76 , Char.ModifierLetter
77 , Char.OtherLetter
78 ]
79 Unicat_Mark ->
80 [ Char.NonSpacingMark
81 , Char.SpacingCombiningMark
82 , Char.EnclosingMark
83 ]
84 Unicat_Number ->
85 [ Char.DecimalNumber
86 , Char.LetterNumber
87 , Char.OtherNumber
88 ]
89 Unicat_Punctuation ->
90 [ Char.ConnectorPunctuation
91 , Char.DashPunctuation
92 , Char.OpenPunctuation
93 , Char.ClosePunctuation
94 , Char.OtherPunctuation
95 ]
96 Unicat_Symbol ->
97 [ Char.MathSymbol
98 , Char.CurrencySymbol
99 , Char.ModifierSymbol
100 , Char.OtherSymbol
101 ]
102 Unicat cat -> [cat]
103
104 -- * Type 'Reg'
105 -- | Left or right regular grammar.
106 newtype Reg (lr::LR) g a = Reg { unReg :: g a }
107 deriving (IsString, Functor, Gram_Terminal)
108 deriving instance Alter g => Alter (Reg lr g)
109 deriving instance Gram_Rule g => Gram_Rule (Reg lr g)
110 deriving instance (Functor g, Alter g, Gram_RegL g) => Gram_RegL (RegL g)
111 deriving instance (Functor g, Alter g, Gram_RegR g) => Gram_RegR (RegR g)
112
113 reg_of_term :: Terminal g a -> Reg lr g a
114 reg_of_term (Terminal g) = Reg g
115
116 -- ** Type 'LR'
117 data LR
118 = L -- ^ Left
119 | R -- ^ Right
120 deriving (Eq, Show)
121 type RegL = Reg 'L
122 type RegR = Reg 'R
123
124 -- ** Class 'Alter'
125 -- | Like 'Alternative' but without the 'Applicative' super-class,
126 -- because a regular grammar is not closed under 'Applicative'.
127 -- And also because the alternative operator has to backtrack
128 -- when the first alternative fails.
129 class Alter g where
130 empty :: g a
131 (<+>) :: g a -> g a -> g a
132 choice :: [g a] -> g a
133 choice = foldr (<+>) empty
134 deriving instance Alter p => Alter (Terminal p)
135
136 infixl 3 <+>
137
138 -- ** Class 'Gram_RegR'
139 -- | Symantics for right regular grammars.
140 class (Functor g, Alter g) => Gram_RegR g where
141 (.*>) :: Terminal g (a -> b) -> RegR g a -> RegR g b
142 manyR :: Terminal g a -> RegR g [a]
143 manyR g = (:) <$> g .*> manyR g <+> empty
144 someR :: Terminal g a -> RegR g [a]
145 someR g = (:) <$> g .*> manyR g
146 infixl 4 .*>
147
148 -- ** Class 'Gram_RegL'
149 -- | Symantics for left regular grammars.
150 class (Functor g, Alter g) => Gram_RegL g where
151 (<*.) :: RegL g (a -> b) -> Terminal g a -> RegL g b
152 manyL :: Terminal g a -> RegL g [a]
153 manyL g' = reverse <$> go g'
154 where go g = flip (:) <$> go g <*. g <+> empty
155 someL :: Terminal g a -> RegL g [a]
156 someL g = (\cs c -> cs ++ [c]) <$> manyL g <*. g
157 infixl 4 <*.
158
159 -- * Class 'Alt'
160 class (Alter g, Applicative g) => Alt g where
161 option :: a -> g a -> g a
162 option x g = g <+> pure x
163 optional :: g a -> g (Maybe a)
164 optional v = Just <$> v <+> pure Nothing
165 many :: g a -> g [a]
166 many a = some a <+> pure []
167 some :: g a -> g [a]
168 some a = (:) <$> a <*> many a
169 skipMany :: g a -> g ()
170 skipMany = void . many
171 --manyTill :: g a -> g end -> g [a]
172 --manyTill g end = go where go = ([] <$ end) <|> ((:) <$> g <*> go)
173
174 -- * Class 'App'
175 class Applicative g => App g where
176 between :: g open -> g close -> g a -> g a
177 between open close g = open *> g <* close
178
179 -- * Type 'CF'
180 -- | Context-free grammar.
181 newtype CF g a = CF { unCF :: g a }
182 deriving (IsString, Functor, Gram_Terminal, Applicative, App)
183 deriving instance Alter g => Alter (CF g)
184 deriving instance Alt g => Alt (CF g)
185 deriving instance Gram_Rule g => Gram_Rule (CF g)
186 deriving instance Gram_RegL g => Gram_RegL (CF g)
187 deriving instance Gram_RegR g => Gram_RegR (CF g)
188 deriving instance Gram_CF g => Gram_CF (CF g)
189
190 cf_of_term :: Terminal g a -> CF g a
191 cf_of_term (Terminal g) = CF g
192
193 cf_of_reg :: Reg lr g a -> CF g a
194 cf_of_reg (Reg g) = CF g
195
196 -- ** Class 'Gram_CF'
197 -- | Symantics for context-free grammars.
198 class Gram_CF g where
199 -- | NOTE: CFL ∩ RL is a CFL.
200 -- See ISBN 81-7808-347-7, Theorem 7.27, g.286
201 (<&) :: CF g (a -> b) -> Reg lr g a -> CF g b
202 (&>) :: Reg lr g (a -> b) -> CF g a -> CF g b
203 -- | NOTE: CFL - RL is a CFL.
204 -- See ISBN 81-7808-347-7, Theorem 7.29, g.289
205 minus :: CF g a -> Reg lr g b -> CF g a
206 infixl 4 <&
207 infixl 4 &>
208
209 -- * Class 'Gram_Meta'
210 class Gram_Meta meta g where
211 metaG :: g (meta -> a) -> g a
212 instance Gram_Meta meta g => Gram_Meta meta (CF g) where
213 metaG = CF . metaG . unCF
214
215 -- * Class 'Gram_Lexer'
216 class
217 ( Alt g
218 , Alter g
219 , App g
220 , Gram_CF g
221 , Gram_Rule g
222 , Gram_Terminal g
223 ) => Gram_Lexer g where
224 commentable :: g () -> g () -> g () -> g ()
225 commentable = rule3 "commentable" $ \g line block ->
226 skipMany $ choice [g, line, block]
227 comment_line :: CF g String -> CF g String
228 comment_line prefix = rule "comment_line" $
229 prefix *> many (any `minus` (void (char '\n') <+> eoi))
230 comment_block :: CF g String -> Reg lr g String -> CF g String
231 comment_block start end = rule "comment_block" $
232 start *> many (any `minus` void end)
233 lexeme :: CF g a -> CF g a
234 lexeme = rule1 "lexeme" $ \g ->
235 g <* commentable
236 (void $ char ' ')
237 (void $ comment_line (string "--"))
238 (void $ comment_block (string "{-") (string "-}"))
239 parens :: CF g a -> CF g a
240 parens = rule1 "parens" $
241 between
242 (lexeme $ string "(")
243 (lexeme $ string ")")
244 operators
245 :: CF g a -- ^ expression
246 -> CF g (Unifix, a -> a) -- ^ prefix operator
247 -> CF g (Infix , a -> a -> a) -- ^ infix operator
248 -> CF g (Unifix, a -> a) -- ^ postfix operator
249 -> CF g (Either Error_Fixity a)
250 operators g prG iG poG =
251 (evalOpTree <$>)
252 <$> go g prG iG poG
253 where
254 go
255 :: CF g a
256 -> CF g (Unifix, a -> a)
257 -> CF g (Infix , a -> a -> a)
258 -> CF g (Unifix, a -> a)
259 -> CF g (Either Error_Fixity (OpTree a))
260 go = rule4 "operators" $ \aG preG inG postG ->
261 (\pres a posts ->
262 let nod_a =
263 foldr insert_unifix
264 (foldl' (flip insert_unifix) (OpNode0 a) posts)
265 pres
266 in \case
267 Just (in_, b) -> insert_infix nod_a in_ b
268 Nothing -> Right nod_a)
269 <$> many preG
270 <*> aG
271 <*> many postG
272 <*> option Nothing (curry Just <$> inG <*> go aG preG inG postG)
273
274 insert_unifix :: (Unifix, a -> a) -> OpTree a -> OpTree a
275 insert_unifix a@(uni_a@(Prefix prece_a), op_a) nod_b =
276 case nod_b of
277 OpNode0{} -> OpNode1 uni_a op_a nod_b
278 OpNode1 Prefix{} _op_b _nod -> OpNode1 uni_a op_a nod_b
279 OpNode1 uni_b@(Postfix prece_b) op_b nod ->
280 case prece_b `compare` prece_a of
281 GT -> OpNode1 uni_a op_a nod_b
282 EQ -> OpNode1 uni_a op_a nod_b
283 LT -> OpNode1 uni_b op_b $ insert_unifix a nod
284 OpNode2 inf_b op_b l r ->
285 case infix_prece inf_b `compare` prece_a of
286 GT -> OpNode1 uni_a op_a nod_b
287 EQ -> OpNode1 uni_a op_a nod_b
288 LT -> OpNode2 inf_b op_b (insert_unifix a l) r
289 insert_unifix a@(uni_a@(Postfix prece_a), op_a) nod_b =
290 case nod_b of
291 OpNode0{} -> OpNode1 uni_a op_a nod_b
292 OpNode1 uni_b@(Prefix prece_b) op_b nod ->
293 case prece_b `compare` prece_a of
294 GT -> OpNode1 uni_a op_a nod_b
295 EQ -> OpNode1 uni_a op_a nod_b
296 LT -> OpNode1 uni_b op_b $ insert_unifix a nod
297 OpNode1 Postfix{} _op_b _nod -> OpNode1 uni_a op_a nod_b
298 OpNode2 inf_b op_b l r ->
299 case infix_prece inf_b `compare` prece_a of
300 GT -> OpNode1 uni_a op_a nod_b
301 EQ -> OpNode1 uni_a op_a nod_b
302 LT -> OpNode2 inf_b op_b l (insert_unifix a r)
303
304 insert_infix
305 :: OpTree a
306 -> (Infix, a -> a -> a)
307 -> Either Error_Fixity (OpTree a)
308 -> Either Error_Fixity (OpTree a)
309 insert_infix nod_a in_@(inf_a, op_a) e_nod_b = do
310 nod_b <- e_nod_b
311 case nod_b of
312 OpNode0{} -> Right $ OpNode2 inf_a op_a nod_a nod_b
313 OpNode1 uni_b op_b nod ->
314 case unifix_prece uni_b `compare` infix_prece inf_a of
315 EQ -> Right $ OpNode2 inf_a op_a nod_a nod_b
316 GT -> Right $ OpNode2 inf_a op_a nod_a nod_b
317 LT -> do
318 n <- insert_infix nod_a in_ (Right nod)
319 Right $ OpNode1 uni_b op_b n
320 OpNode2 inf_b op_b l r ->
321 case infix_prece inf_b `compare` infix_prece inf_a of
322 GT -> Right $ OpNode2 inf_a op_a nod_a nod_b
323 LT -> do
324 n <- insert_infix nod_a in_ (Right l)
325 Right $ OpNode2 inf_b op_b n r
326 EQ ->
327 let ass = \case
328 AssocL -> L
329 AssocR -> R
330 AssocB lr -> lr in
331 case (ass <$> infix_assoc inf_b, ass <$> infix_assoc inf_a) of
332 (Just L, Just L) -> do
333 n <- insert_infix nod_a in_ (Right l)
334 Right $ OpNode2 inf_b op_b n r
335 (Just R, Just R) ->
336 Right $ OpNode2 inf_a op_a nod_a nod_b
337 _ -> Left $ Error_Fixity_Infix_not_combinable inf_a inf_b
338 -- NOTE: non-associating infix ops
339 -- of the same precedence cannot be mixed.
340 infixrG :: CF g a -> CF g (a -> a -> a) -> CF g a
341 infixrG = rule2 "infixr" $ \g opG ->
342 (\a -> \case
343 Just (op, b) -> a `op` b
344 Nothing -> a)
345 <$> g
346 <*> option Nothing (curry Just <$> opG <*> infixrG g opG)
347 infixlG :: CF g a -> CF g (a -> a -> a) -> CF g a
348 infixlG = rule2 "infixl" $ \g opG ->
349 -- NOTE: infixl uses the same grammar than infixr,
350 -- but build the parsed value by applying
351 -- the operator in the opposite way.
352 ($ id) <$> go g opG
353 where
354 go :: CF g a -> CF g (a -> a -> a) -> CF g ((a -> a) -> a)
355 go g opG =
356 (\a -> \case
357 Just (op, kb) -> \k -> kb (k a `op`)
358 Nothing -> ($ a))
359 <$> g
360 <*> option Nothing (curry Just <$> opG <*> go g opG)
361 inside :: (a -> b) -> CF g begin -> CF g a -> CF g end -> CF g b -> CF g b
362 inside f = rule4 "inside" $ \begin i end n ->
363 (f <$ begin <*> i <* end) <+> n
364 symbol :: String -> CF g String
365 symbol = lexeme . string
366
367 deriving instance Gram_Lexer g => Gram_Lexer (CF g)
368
369 -- ** Type 'Error_Fixity'
370 data Error_Fixity
371 = Error_Fixity_Infix_not_combinable Infix Infix
372 | Error_Fixity_NeedPostfixOrInfix
373 | Error_Fixity_NeedPrefix
374 | Error_Fixity_NeedPostfix
375 | Error_Fixity_NeedInfix
376 deriving (Eq, Show)
377
378 -- ** Type 'NeedFixity'
379 data NeedFixity
380 = NeedPrefix
381 | NeedPostfix
382 | NeedPostfixOrInfix
383 deriving (Eq, Ord, Show)
384
385 -- ** Type 'Fixity'
386 data Fixity a
387 = FixityPrefix Unifix (a -> a)
388 | FixityPostfix Unifix (a -> a)
389 | FixityInfix Infix (a -> a -> a)
390
391 -- ** Type 'Precedence'
392 type Precedence = Int
393
394 -- ** Type 'Associativity'
395 -- type Associativity = LR
396 data Associativity
397 = AssocL -- ^ Associate to the left: @a ¹ b ² c == (a ¹ b) ² c@
398 | AssocR -- ^ Associate to the right: @a ¹ b ² c == a ¹ (b ² c)@
399 | AssocB LR -- ^ Associate to both side, but to 'LR' when reading.
400 deriving (Eq, Show)
401
402 -- ** Type 'Unifix'
403 data Unifix
404 = Prefix { unifix_prece :: Precedence }
405 | Postfix { unifix_prece :: Precedence }
406 deriving (Eq, Show)
407
408 -- ** Type 'Infix'
409 data Infix
410 = Infix
411 { infix_assoc :: Maybe Associativity
412 , infix_prece :: Precedence
413 } deriving (Eq, Show)
414
415 infixL :: Precedence -> Infix
416 infixL = Infix (Just AssocL)
417
418 infixR :: Precedence -> Infix
419 infixR = Infix (Just AssocR)
420
421 infixB :: LR -> Precedence -> Infix
422 infixB = Infix . Just . AssocB
423
424 infixN :: Precedence -> Infix
425 infixN = Infix Nothing
426
427 infixN0 :: Infix
428 infixN0 = infixN 0
429
430 infixN5 :: Infix
431 infixN5 = infixN 5
432
433 infix_paren
434 :: (Semigroup s, IsString s)
435 => (Infix, LR) -> Infix -> s -> s
436 infix_paren (po, lr) op s =
437 if infix_prece op < infix_prece po
438 || infix_prece op == infix_prece po
439 && Bool.not associate
440 then fromString "(" <> s <> fromString ")"
441 else s
442 where
443 associate =
444 case (lr, infix_assoc po) of
445 (_, Just AssocB{}) -> True
446 (L, Just AssocL) -> True
447 (R, Just AssocR) -> True
448 _ -> False
449
450 -- ** Type 'OpTree'
451 data OpTree a
452 = OpNode0 a
453 | OpNode1 Unifix (a -> a) (OpTree a)
454 | OpNode2 Infix (a -> a -> a) (OpTree a) (OpTree a)
455
456 -- | Collapse an 'OpTree'.
457 evalOpTree :: OpTree a -> a
458 evalOpTree (OpNode0 a) = a
459 evalOpTree (OpNode1 _uni op n) = op $ evalOpTree n
460 evalOpTree (OpNode2 _inf op l r) = evalOpTree l `op` evalOpTree r