]> Git — Sourcephile - haskell/symantic.git/blob - symantic-grammar/Language/Symantic/Grammar/Terminal.hs
Sync with ghc-8.2.2 and megaparsec-6.3.0.
[haskell/symantic.git] / symantic-grammar / Language / Symantic / Grammar / Terminal.hs
1 {-# OPTIONS_GHC -fno-warn-orphans #-}
2 -- | Symantics for terminal grammars.
3 module Language.Symantic.Grammar.Terminal where
4
5 import Data.Semigroup (Semigroup(..))
6 import Data.String (IsString(..))
7 import qualified Data.Bool as Bool
8 import qualified Data.Char as Char
9 import qualified Data.List as List
10 import qualified Data.Text as Text
11 import qualified Data.Text.Lazy as TL
12
13 import Language.Symantic.Grammar.Fixity
14 import Language.Symantic.Grammar.EBNF
15
16 -- * Type 'Terminal'
17 -- | Terminal grammar.
18 newtype Terminal g a
19 = Terminal { unTerminal :: g a }
20 deriving (Functor, Gram_Char, Gram_String)
21 deriving instance Gram_Rule g => Gram_Rule (Terminal g)
22
23 -- ** Class 'Gram_Char'
24 -- | Symantics for terminal grammars.
25 class Gram_Rule g => Gram_Char g where
26 any :: g Char
27 but :: Terminal g Char -> Terminal g Char -> Terminal g Char
28 eoi :: g ()
29 eol :: g Char
30 space :: g Char
31 char :: Char -> g Char
32 unicat :: Unicat -> g Char
33 range :: (Char, Char) -> g Char
34 eol = rule "NewLine" $ char '\n'
35 space = rule "Space" $ char ' '
36 deriving instance Gram_Char RuleEBNF
37 instance Gram_Char EBNF where
38 any = ebnf_const "_"
39 Terminal (EBNF f) `but` Terminal (EBNF g) =
40 Terminal $ EBNF $ \bo po -> parenInfix po op $
41 f bo (op, SideL) <> " - " <> g bo (op, SideR)
42 where op = infixL 6
43 eoi = ebnf_const "eoi"
44 eol = ebnf_const "↵"
45 space = ebnf_const "␣"
46 char = ebnf_const . escape
47 where
48 escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""]
49 escape c = Text.concat ["U+", Text.pack $ show $ Char.ord c]
50 unicat = ebnf_const . Text.pack . show
51 range (l, h) = ebnf_const $ Text.concat
52 [ runEBNF $ char l
53 , "…"
54 , runEBNF $ char h
55 ]
56
57 -- *** Type 'Unicat'
58 -- | Unicode category.
59 data Unicat
60 = Unicat_Letter
61 | Unicat_Mark
62 | Unicat_Number
63 | Unicat_Punctuation
64 | Unicat_Symbol
65 | Unicat Char.GeneralCategory
66 deriving (Eq, Show)
67
68 unicode_categories :: Unicat -> [Char.GeneralCategory]
69 unicode_categories c =
70 case c of
71 Unicat_Letter ->
72 [ Char.UppercaseLetter
73 , Char.LowercaseLetter
74 , Char.TitlecaseLetter
75 , Char.ModifierLetter
76 , Char.OtherLetter
77 ]
78 Unicat_Mark ->
79 [ Char.NonSpacingMark
80 , Char.SpacingCombiningMark
81 , Char.EnclosingMark
82 ]
83 Unicat_Number ->
84 [ Char.DecimalNumber
85 , Char.LetterNumber
86 , Char.OtherNumber
87 ]
88 Unicat_Punctuation ->
89 [ Char.ConnectorPunctuation
90 , Char.DashPunctuation
91 , Char.OpenPunctuation
92 , Char.ClosePunctuation
93 , Char.OtherPunctuation
94 ]
95 Unicat_Symbol ->
96 [ Char.MathSymbol
97 , Char.CurrencySymbol
98 , Char.ModifierSymbol
99 , Char.OtherSymbol
100 ]
101 Unicat cat -> [cat]
102
103 -- ** Class 'Gram_String'
104 class Functor g => Gram_String g where
105 string :: String -> g String
106 {-
107 string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "")
108 string [] = pure []
109 string (c:cs) = (:) <$> char c <*> string cs
110 -}
111 text :: Text.Text -> g Text.Text
112 textLazy :: TL.Text -> g TL.Text
113 text t = Text.pack <$> string (Text.unpack t)
114 textLazy t = TL.pack <$> string (TL.unpack t)
115 deriving instance Gram_String RuleEBNF
116 instance Gram_String EBNF where
117 string s =
118 case List.break (\c -> Bool.not (Char.isPrint c) || c == '"') s of
119 (ps, "") -> raw ps
120 ("", [c]) -> "" <$ char c
121 (ps, [c]) -> "" <$ raw ps <* char c
122 ("", c:rs) -> "" <$ char c <* string rs
123 (ps, c:rs) -> "" <$ raw ps <* char c <* string rs
124 where
125 raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""]
126 instance IsString (EBNF String) where
127 fromString = string