]> Git — Sourcephile - haskell/symantic.git/blob - symantic-grammar/Language/Symantic/Grammar/Terminal.hs
Clarification : Megaparsec integration.
[haskell/symantic.git] / symantic-grammar / Language / Symantic / Grammar / Terminal.hs
1 {-# OPTIONS_GHC -fno-warn-orphans #-}
2 -- | Symantics for terminal grammars.
3 module Language.Symantic.Grammar.Terminal where
4
5 import Control.Monad
6 import Data.Semigroup (Semigroup(..))
7 import Data.String (IsString(..))
8 import Prelude hiding (any)
9 import qualified Data.Bool as Bool
10 import qualified Data.Char as Char
11 import qualified Data.List as List
12 import qualified Data.Text as Text
13
14 import Language.Symantic.Grammar.Fixity
15 import Language.Symantic.Grammar.EBNF
16
17 -- * Type 'Terminal'
18 -- | Terminal grammar.
19 newtype Terminal g a
20 = Terminal { unTerminal :: g a }
21 deriving (Functor, Gram_Terminal)
22 deriving instance Gram_Rule g => Gram_Rule (Terminal g)
23
24 -- ** Class 'Gram_Terminal'
25 -- | Symantics for terminal grammars.
26 class Gram_Terminal g where
27 any :: g Char
28 but :: Terminal g Char -> Terminal g Char -> Terminal g Char
29 eoi :: g ()
30 char :: Char -> g Char
31 string :: String -> g String
32 unicat :: Unicat -> g Char
33 range :: (Char, Char) -> g Char
34 -- string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "")
35 -- string [] = pure []
36 -- string (c:cs) = (:) <$> char c <*> string cs
37 deriving instance Gram_Terminal RuleEBNF
38 instance Gram_Terminal EBNF where
39 any = ebnf_const "_"
40 Terminal (EBNF f) `but` Terminal (EBNF g) =
41 Terminal $ EBNF $ \bo po -> parenInfix po op $
42 f bo (op, SideL) <> " - " <> g bo (op, SideR)
43 where op = infixL 6
44 eoi = ebnf_const "eoi"
45 char = ebnf_const . escape
46 where
47 escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""]
48 escape c = Text.concat ["U+", Text.pack $ show $ Char.ord c]
49 string s =
50 case List.break (\c -> Bool.not (Char.isPrint c) || c == '"') s of
51 (ps, "") -> raw ps
52 ("", [c]) -> "" <$ char c
53 (ps, [c]) -> "" <$ raw ps <* char c
54 ("", c:rs) -> "" <$ char c <* string rs
55 (ps, c:rs) -> "" <$ raw ps <* char c <* string rs
56 where
57 raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""]
58 unicat = ebnf_const . Text.pack . show
59 range (l, h) = ebnf_const $ Text.concat
60 [ runEBNF $ char l
61 , "…"
62 , runEBNF $ char h
63 ]
64 instance IsString (EBNF String) where
65 fromString = string
66
67 -- *** Type 'Unicat'
68 -- | Unicode category.
69 data Unicat
70 = Unicat_Letter
71 | Unicat_Mark
72 | Unicat_Number
73 | Unicat_Punctuation
74 | Unicat_Symbol
75 | Unicat Char.GeneralCategory
76 deriving (Eq, Show)
77
78 unicode_categories :: Unicat -> [Char.GeneralCategory]
79 unicode_categories c =
80 case c of
81 Unicat_Letter ->
82 [ Char.UppercaseLetter
83 , Char.LowercaseLetter
84 , Char.TitlecaseLetter
85 , Char.ModifierLetter
86 , Char.OtherLetter
87 ]
88 Unicat_Mark ->
89 [ Char.NonSpacingMark
90 , Char.SpacingCombiningMark
91 , Char.EnclosingMark
92 ]
93 Unicat_Number ->
94 [ Char.DecimalNumber
95 , Char.LetterNumber
96 , Char.OtherNumber
97 ]
98 Unicat_Punctuation ->
99 [ Char.ConnectorPunctuation
100 , Char.DashPunctuation
101 , Char.OpenPunctuation
102 , Char.ClosePunctuation
103 , Char.OtherPunctuation
104 ]
105 Unicat_Symbol ->
106 [ Char.MathSymbol
107 , Char.CurrencySymbol
108 , Char.ModifierSymbol
109 , Char.OtherSymbol
110 ]
111 Unicat cat -> [cat]