]> Git — Sourcephile - haskell/symantic.git/blob - Language/Symantic/Parsing/Grammar.hs
Fix Lib.Text.
[haskell/symantic.git] / Language / Symantic / Parsing / Grammar.hs
1 {-# LANGUAGE ConstraintKinds #-}
2 {-# LANGUAGE DeriveFunctor #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5 {-# LANGUAGE StandaloneDeriving #-}
6 -- | This module defines symantics
7 -- for regular or context-free grammars.
8 --
9 -- The default grammar can be printed in 'EBNF'
10 -- with: @cabal test ebnf --show-details=always@.
11 module Language.Symantic.Parsing.Grammar where
12
13 import Control.Applicative (Applicative(..), Alternative(..))
14 import Control.Monad
15 import qualified Data.Bool as Bool
16 import qualified Data.Char as Char
17 import Data.Foldable hiding (any)
18 import Data.Semigroup hiding (option)
19 import Data.String (IsString(..))
20 import Data.Text (Text)
21 import Prelude hiding (any)
22
23 -- * Class 'Gram_Rule'
24 type Id a = a -> a
25 class Gram_Rule g where
26 rule :: Text -> Id (g a)
27 rule _n = id
28 rule1 :: Text -> Id (g a -> g b)
29 rule1 _n g = g
30 rule2 :: Text -> Id (g a -> g b -> g c)
31 rule2 _n g = g
32 rule3 :: Text -> Id (g a -> g b -> g c -> g d)
33 rule3 _n g = g
34 rule4 :: Text -> Id (g a -> g b -> g c -> g d -> g e)
35 rule4 _n g = g
36
37 -- * Type 'Terminal'
38 -- | Terminal grammar.
39 newtype Terminal g a
40 = Terminal { unTerminal :: g a }
41 deriving (Functor, Gram_Terminal)
42 deriving instance Gram_Rule g => Gram_Rule (Terminal g)
43
44 -- ** Class 'Gram_Terminal'
45 -- | Symantics for terminal grammars.
46 class Gram_Terminal g where
47 any :: g Char
48 but :: Terminal g Char -> Terminal g Char -> Terminal g Char
49 eoi :: g ()
50 char :: Char -> g Char
51 string :: String -> g String
52 unicat :: Unicat -> g Char
53 range :: (Char, Char) -> g Char
54 -- string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "")
55 -- string [] = pure []
56 -- string (c:cs) = (:) <$> char c <*> string cs
57
58 -- *** Type 'Unicat'
59 -- | Unicode category.
60 data Unicat
61 = Unicat_Letter
62 | Unicat_Mark
63 | Unicat_Number
64 | Unicat_Punctuation
65 | Unicat_Symbol
66 | Unicat Char.GeneralCategory
67 deriving (Eq, Show)
68
69 unicode_categories :: Unicat -> [Char.GeneralCategory]
70 unicode_categories c =
71 case c of
72 Unicat_Letter ->
73 [ Char.UppercaseLetter
74 , Char.LowercaseLetter
75 , Char.TitlecaseLetter
76 , Char.ModifierLetter
77 , Char.OtherLetter
78 ]
79 Unicat_Mark ->
80 [ Char.NonSpacingMark
81 , Char.SpacingCombiningMark
82 , Char.EnclosingMark
83 ]
84 Unicat_Number ->
85 [ Char.DecimalNumber
86 , Char.LetterNumber
87 , Char.OtherNumber
88 ]
89 Unicat_Punctuation ->
90 [ Char.ConnectorPunctuation
91 , Char.DashPunctuation
92 , Char.OpenPunctuation
93 , Char.ClosePunctuation
94 , Char.OtherPunctuation
95 ]
96 Unicat_Symbol ->
97 [ Char.MathSymbol
98 , Char.CurrencySymbol
99 , Char.ModifierSymbol
100 , Char.OtherSymbol
101 ]
102 Unicat cat -> [cat]
103
104 -- * Type 'Reg'
105 -- | Left or right regular grammar.
106 newtype Reg (lr::LR) g a = Reg { unReg :: g a }
107 deriving (IsString, Functor, Gram_Terminal, Alter)
108 deriving instance Gram_Rule g => Gram_Rule (Reg lr g)
109 deriving instance (Functor g, Alter g, Gram_RegL g) => Gram_RegL (RegL g)
110 deriving instance (Functor g, Alter g, Gram_RegR g) => Gram_RegR (RegR g)
111
112 reg_of_term :: Terminal g a -> Reg lr g a
113 reg_of_term (Terminal g) = Reg g
114
115 -- ** Type 'LR'
116 data LR
117 = L -- ^ Left
118 | R -- ^ Right
119 deriving (Eq, Show)
120 type RegL = Reg 'L
121 type RegR = Reg 'R
122
123 -- ** Class 'Alter'
124 -- | Like 'Alternative' but without the 'Applicative' super-class,
125 -- because a regular grammar is not closed under 'Applicative'.
126 class Alter g where
127 nil :: g a
128 (<+>) :: g a -> g a -> g a
129 choice :: [g a] -> g a
130 star :: g a -> g [a]
131 default nil :: Alternative g => g a
132 default (<+>) :: Alternative g => g a -> g a -> g a
133 default choice :: Alternative g => [g a] -> g a
134 default star :: Alternative g => g a -> g [a]
135 nil = empty
136 (<+>) = (<|>)
137 choice = foldr (<+>) empty
138 -- star g = (:) <$> g *> star g <+> nil
139
140 star a = many_a
141 where
142 many_a = some_a <+> pure []
143 some_a = ((:) <$> a) <*> many_a
144
145 infixl 3 <+>
146 deriving instance Alter p => Alter (Terminal p)
147
148 -- ** Class 'Gram_RegR'
149 -- | Symantics for right regular grammars.
150 class (Functor g, Alter g) => Gram_RegR g where
151 (.*>) :: Terminal g (a -> b) -> RegR g a -> RegR g b
152 manyR :: Terminal g a -> RegR g [a]
153 manyR g = (:) <$> g .*> manyR g <+> nil
154 someR :: Terminal g a -> RegR g [a]
155 someR g = (:) <$> g .*> manyR g
156 infixl 4 .*>
157
158 -- ** Class 'Gram_RegL'
159 -- | Symantics for left regular grammars.
160 class (Functor g, Alter g) => Gram_RegL g where
161 (<*.) :: RegL g (a -> b) -> Terminal g a -> RegL g b
162 manyL :: Terminal g a -> RegL g [a]
163 manyL g' = reverse <$> go g'
164 where go g = flip (:) <$> go g <*. g <+> nil
165 someL :: Terminal g a -> RegL g [a]
166 someL g = (\cs c -> cs ++ [c]) <$> manyL g <*. g
167 infixl 4 <*.
168
169 -- * Class 'Alt'
170 class (Alternative g, Alter g) => Alt g where
171 option :: a -> g a -> g a
172 option x g = g <+> pure x
173 skipMany :: g a -> g ()
174 skipMany = void . many
175 --manyTill :: g a -> g end -> g [a]
176 --manyTill g end = go where go = ([] <$ end) <|> ((:) <$> g <*> go)
177
178 -- * Class 'App'
179 class Applicative g => App g where
180 between :: g open -> g close -> g a -> g a
181 between open close g = open *> g <* close
182
183 -- * Type 'CF'
184 -- | Context-free grammar.
185 newtype CF g a = CF { unCF :: g a }
186 deriving (IsString, Functor, Gram_Terminal, Applicative, App, Alternative, Alter, Alt)
187 deriving instance Gram_Rule g => Gram_Rule (CF g)
188 deriving instance Gram_RegL g => Gram_RegL (CF g)
189 deriving instance Gram_RegR g => Gram_RegR (CF g)
190 deriving instance Gram_CF g => Gram_CF (CF g)
191
192 cf_of_term :: Terminal g a -> CF g a
193 cf_of_term (Terminal g) = CF g
194
195 cf_of_reg :: Reg lr g a -> CF g a
196 cf_of_reg (Reg g) = CF g
197
198 -- ** Class 'Gram_CF'
199 -- | Symantics for context-free grammars.
200 class Gram_CF g where
201 -- | NOTE: CFL ∩ RL is a CFL.
202 -- See ISBN 81-7808-347-7, Theorem 7.27, g.286
203 (<&) :: CF g (a -> b) -> Reg lr g a -> CF g b
204 (&>) :: Reg lr g (a -> b) -> CF g a -> CF g b
205 -- | NOTE: CFL - RL is a CFL.
206 -- See ISBN 81-7808-347-7, Theorem 7.29, g.289
207 minus :: CF g a -> Reg lr g b -> CF g a
208 infixl 4 <&
209 infixl 4 &>
210
211 -- * Class 'Gram_Meta'
212 class Gram_Meta meta g where
213 metaG :: g (meta -> a) -> g a
214 instance Gram_Meta meta g => Gram_Meta meta (CF g) where
215 metaG = CF . metaG . unCF
216
217 -- * Class 'Gram_Lexer'
218 class
219 ( Alt g
220 , Alter g
221 , Alternative g
222 , App g
223 , Gram_CF g
224 , Gram_Rule g
225 , Gram_Terminal g
226 ) => Gram_Lexer g where
227 commentable :: g () -> g () -> g () -> g ()
228 commentable = rule3 "commentable" $ \g line block ->
229 skipMany $ choice [g, line, block]
230 comment_line :: CF g String -> CF g String
231 comment_line prefix = rule "comment_line" $
232 prefix *> many (any `minus` (void (char '\n') <+> eoi))
233 comment_block :: CF g String -> Reg lr g String -> CF g String
234 comment_block start end = rule "comment_block" $
235 start *> many (any `minus` void end)
236 lexeme :: CF g a -> CF g a
237 lexeme = rule1 "lexeme" $ \g ->
238 g <* commentable
239 (void $ char ' ')
240 (void $ comment_line (string "--"))
241 (void $ comment_block (string "{-") (string "-}"))
242 parens :: CF g a -> CF g a
243 parens = rule1 "parens" $
244 between
245 (lexeme $ string "(")
246 (lexeme $ string ")")
247 operators
248 :: CF g a -- ^ expression
249 -> CF g (Unifix, a -> a) -- ^ prefix operator
250 -> CF g (Infix , a -> a -> a) -- ^ infix operator
251 -> CF g (Unifix, a -> a) -- ^ postfix operator
252 -> CF g (Either Error_Fixity a)
253 operators g prG iG poG =
254 (evalOpTree <$>)
255 <$> go g prG iG poG
256 where
257 go
258 :: CF g a
259 -> CF g (Unifix, a -> a)
260 -> CF g (Infix , a -> a -> a)
261 -> CF g (Unifix, a -> a)
262 -> CF g (Either Error_Fixity (OpTree a))
263 go = rule4 "operators" $ \aG preG inG postG ->
264 (\pres a posts ->
265 let nod_a =
266 foldr insert_unifix
267 (foldl' (flip insert_unifix) (OpNode0 a) posts)
268 pres
269 in \case
270 Just (in_, b) -> insert_infix nod_a in_ b
271 Nothing -> Right nod_a)
272 <$> star preG
273 <*> aG
274 <*> star postG
275 <*> option Nothing (curry Just <$> inG <*> go aG preG inG postG)
276
277 insert_unifix :: (Unifix, a -> a) -> OpTree a -> OpTree a
278 insert_unifix a@(uni_a@(Prefix prece_a), op_a) nod_b =
279 case nod_b of
280 OpNode0{} -> OpNode1 uni_a op_a nod_b
281 OpNode1 Prefix{} _op_b _nod -> OpNode1 uni_a op_a nod_b
282 OpNode1 uni_b@(Postfix prece_b) op_b nod ->
283 case prece_b `compare` prece_a of
284 GT -> OpNode1 uni_a op_a nod_b
285 EQ -> OpNode1 uni_a op_a nod_b
286 LT -> OpNode1 uni_b op_b $ insert_unifix a nod
287 OpNode2 inf_b op_b l r ->
288 case infix_prece inf_b `compare` prece_a of
289 GT -> OpNode1 uni_a op_a nod_b
290 EQ -> OpNode1 uni_a op_a nod_b
291 LT -> OpNode2 inf_b op_b (insert_unifix a l) r
292 insert_unifix a@(uni_a@(Postfix prece_a), op_a) nod_b =
293 case nod_b of
294 OpNode0{} -> OpNode1 uni_a op_a nod_b
295 OpNode1 uni_b@(Prefix prece_b) op_b nod ->
296 case prece_b `compare` prece_a of
297 GT -> OpNode1 uni_a op_a nod_b
298 EQ -> OpNode1 uni_a op_a nod_b
299 LT -> OpNode1 uni_b op_b $ insert_unifix a nod
300 OpNode1 Postfix{} _op_b _nod -> OpNode1 uni_a op_a nod_b
301 OpNode2 inf_b op_b l r ->
302 case infix_prece inf_b `compare` prece_a of
303 GT -> OpNode1 uni_a op_a nod_b
304 EQ -> OpNode1 uni_a op_a nod_b
305 LT -> OpNode2 inf_b op_b l (insert_unifix a r)
306
307 insert_infix
308 :: OpTree a
309 -> (Infix, a -> a -> a)
310 -> Either Error_Fixity (OpTree a)
311 -> Either Error_Fixity (OpTree a)
312 insert_infix nod_a in_@(inf_a, op_a) e_nod_b = do
313 nod_b <- e_nod_b
314 case nod_b of
315 OpNode0{} -> Right $ OpNode2 inf_a op_a nod_a nod_b
316 OpNode1 uni_b op_b nod ->
317 case unifix_prece uni_b `compare` infix_prece inf_a of
318 EQ -> Right $ OpNode2 inf_a op_a nod_a nod_b
319 GT -> Right $ OpNode2 inf_a op_a nod_a nod_b
320 LT -> do
321 n <- insert_infix nod_a in_ (Right nod)
322 Right $ OpNode1 uni_b op_b n
323 OpNode2 inf_b op_b l r ->
324 case infix_prece inf_b `compare` infix_prece inf_a of
325 GT -> Right $ OpNode2 inf_a op_a nod_a nod_b
326 LT -> do
327 n <- insert_infix nod_a in_ (Right l)
328 Right $ OpNode2 inf_b op_b n r
329 EQ ->
330 let ass = \case
331 AssocL -> L
332 AssocR -> R
333 AssocB lr -> lr in
334 case (ass <$> infix_assoc inf_b, ass <$> infix_assoc inf_a) of
335 (Just L, Just L) -> do
336 n <- insert_infix nod_a in_ (Right l)
337 Right $ OpNode2 inf_b op_b n r
338 (Just R, Just R) ->
339 Right $ OpNode2 inf_a op_a nod_a nod_b
340 _ -> Left $ Error_Fixity_Infix_not_combinable inf_a inf_b
341 -- NOTE: non-associating infix ops
342 -- of the same precedence cannot be mixed.
343 infixrG :: CF g a -> CF g (a -> a -> a) -> CF g a
344 infixrG = rule2 "infixr" $ \g opG ->
345 (\a -> \case
346 Just (op, b) -> a `op` b
347 Nothing -> a)
348 <$> g
349 <*> option Nothing (curry Just <$> opG <*> infixrG g opG)
350 infixlG :: CF g a -> CF g (a -> a -> a) -> CF g a
351 infixlG = rule2 "infixl" $ \g opG ->
352 -- NOTE: infixl uses the same grammar than infixr,
353 -- but build the parsed value by applying
354 -- the operator in the opposite way.
355 ($ id) <$> go g opG
356 where
357 go :: CF g a -> CF g (a -> a -> a) -> CF g ((a -> a) -> a)
358 go g opG =
359 (\a -> \case
360 Just (op, kb) -> \k -> kb (k a `op`)
361 Nothing -> ($ a))
362 <$> g
363 <*> option Nothing (curry Just <$> opG <*> go g opG)
364 inside :: (a -> b) -> CF g begin -> CF g a -> CF g end -> CF g b -> CF g b
365 inside f = rule4 "inside" $ \begin i end n ->
366 (f <$ begin <*> i <* end) <+> n
367 symbol :: String -> CF g String
368 symbol = lexeme . string
369
370 deriving instance Gram_Lexer g => Gram_Lexer (CF g)
371
372 -- ** Type 'Error_Fixity'
373 data Error_Fixity
374 = Error_Fixity_Infix_not_combinable Infix Infix
375 | Error_Fixity_NeedPostfixOrInfix
376 | Error_Fixity_NeedPrefix
377 | Error_Fixity_NeedPostfix
378 | Error_Fixity_NeedInfix
379 deriving (Eq, Show)
380
381 -- ** Type 'NeedFixity'
382 data NeedFixity
383 = NeedPrefix
384 | NeedPostfix
385 | NeedPostfixOrInfix
386 deriving (Eq, Ord, Show)
387
388 -- ** Type 'Fixity'
389 data Fixity a
390 = FixityPrefix Unifix (a -> a)
391 | FixityPostfix Unifix (a -> a)
392 | FixityInfix Infix (a -> a -> a)
393
394 -- ** Type 'Precedence'
395 type Precedence = Int
396
397 -- ** Type 'Associativity'
398 -- type Associativity = LR
399 data Associativity
400 = AssocL -- ^ Associate to the left: @a ¹ b ² c == (a ¹ b) ² c@
401 | AssocR -- ^ Associate to the right: @a ¹ b ² c == a ¹ (b ² c)@
402 | AssocB LR -- ^ Associate to both side, but to 'LR' when reading.
403 deriving (Eq, Show)
404
405 -- ** Type 'Unifix'
406 data Unifix
407 = Prefix { unifix_prece :: Precedence }
408 | Postfix { unifix_prece :: Precedence }
409 deriving (Eq, Show)
410
411 -- ** Type 'Infix'
412 data Infix
413 = Infix
414 { infix_assoc :: Maybe Associativity
415 , infix_prece :: Precedence
416 } deriving (Eq, Show)
417
418 infixL :: Precedence -> Infix
419 infixL = Infix (Just AssocL)
420
421 infixR :: Precedence -> Infix
422 infixR = Infix (Just AssocR)
423
424 infixB :: LR -> Precedence -> Infix
425 infixB = Infix . Just . AssocB
426
427 infixN :: Precedence -> Infix
428 infixN = Infix Nothing
429
430 infixN0 :: Infix
431 infixN0 = infixN 0
432
433 infixN5 :: Infix
434 infixN5 = infixN 5
435
436 infix_paren
437 :: (Semigroup s, IsString s)
438 => (Infix, LR) -> Infix -> s -> s
439 infix_paren (po, lr) op s =
440 if infix_prece op < infix_prece po
441 || infix_prece op == infix_prece po
442 && Bool.not associate
443 then fromString "(" <> s <> fromString ")"
444 else s
445 where
446 associate =
447 case (lr, infix_assoc po) of
448 (_, Just AssocB{}) -> True
449 (L, Just AssocL) -> True
450 (R, Just AssocR) -> True
451 _ -> False
452
453 -- ** Type 'OpTree'
454 data OpTree a
455 = OpNode0 a
456 | OpNode1 Unifix (a -> a) (OpTree a)
457 | OpNode2 Infix (a -> a -> a) (OpTree a) (OpTree a)
458
459 -- | Collapse an 'OpTree'.
460 evalOpTree :: OpTree a -> a
461 evalOpTree (OpNode0 a) = a
462 evalOpTree (OpNode1 _uni op n) = op $ evalOpTree n
463 evalOpTree (OpNode2 _inf op l r) = evalOpTree l `op` evalOpTree r