]> Git — Sourcephile - haskell/symantic.git/blob - symantic-grammar/Language/Symantic/Grammar/Terminal.hs
Fix lexeme to progress over indented lines.
[haskell/symantic.git] / symantic-grammar / Language / Symantic / Grammar / Terminal.hs
1 {-# OPTIONS_GHC -fno-warn-orphans #-}
2 -- | This module defines symantics
3 -- for terminal grammars.
4 module Language.Symantic.Grammar.Terminal where
5
6 import Control.Monad
7 import qualified Data.Bool as Bool
8 import qualified Data.Char as Char
9 import qualified Data.List as List
10 import Data.Semigroup (Semigroup(..))
11 import Data.String (IsString(..))
12 import qualified Data.Text as Text
13 import Prelude hiding (any)
14
15 import Language.Symantic.Grammar.EBNF
16
17 -- * Type 'Terminal'
18 -- | Terminal grammar.
19 newtype Terminal g a
20 = Terminal { unTerminal :: g a }
21 deriving (Functor, Gram_Terminal)
22 deriving instance Gram_Rule g => Gram_Rule (Terminal g)
23
24 -- ** Class 'Gram_Terminal'
25 -- | Symantics for terminal grammars.
26 class Gram_Terminal g where
27 any :: g Char
28 but :: Terminal g Char -> Terminal g Char -> Terminal g Char
29 eoi :: g ()
30 char :: Char -> g Char
31 string :: String -> g String
32 unicat :: Unicat -> g Char
33 range :: (Char, Char) -> g Char
34 -- string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "")
35 -- string [] = pure []
36 -- string (c:cs) = (:) <$> char c <*> string cs
37 deriving instance Gram_Terminal RuleDef
38 instance Gram_Terminal EBNF where
39 any = ebnf_const "_"
40 Terminal (EBNF f) `but` Terminal (EBNF g) =
41 Terminal $ EBNF $ \bo po -> infix_paren po op $
42 f bo (op, L) <> " - " <> g bo (op, R)
43 where op = infixL 6
44 eoi = ebnf_const "eoi"
45 char = ebnf_const . escape
46 where
47 escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""]
48 escape c = Text.concat ["U+", Text.pack $ show $ Char.ord c]
49 string s =
50 case List.break (\c -> Bool.not (Char.isPrint c) || c == '"') s of
51 (ps, "") -> raw ps
52 ("", [c]) -> "" <$ char c
53 (ps, [c]) -> "" <$ raw ps <* char c
54 ("", c:rs) -> "" <$ char c <* string rs
55 (ps, c:rs) -> "" <$ raw ps <* char c <* string rs
56 where
57 raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""]
58 unicat = ebnf_const . Text.pack . show
59 range (l, h) = ebnf_const $ Text.concat
60 [ runEBNF $ char l
61 , "…"
62 , runEBNF $ char h
63 ]
64 instance IsString (EBNF String) where
65 fromString = string
66
67 -- *** Type 'Unicat'
68 -- | Unicode category.
69 data Unicat
70 = Unicat_Letter
71 | Unicat_Mark
72 | Unicat_Number
73 | Unicat_Punctuation
74 | Unicat_Symbol
75 | Unicat Char.GeneralCategory
76 deriving (Eq, Show)
77
78 unicode_categories :: Unicat -> [Char.GeneralCategory]
79 unicode_categories c =
80 case c of
81 Unicat_Letter ->
82 [ Char.UppercaseLetter
83 , Char.LowercaseLetter
84 , Char.TitlecaseLetter
85 , Char.ModifierLetter
86 , Char.OtherLetter
87 ]
88 Unicat_Mark ->
89 [ Char.NonSpacingMark
90 , Char.SpacingCombiningMark
91 , Char.EnclosingMark
92 ]
93 Unicat_Number ->
94 [ Char.DecimalNumber
95 , Char.LetterNumber
96 , Char.OtherNumber
97 ]
98 Unicat_Punctuation ->
99 [ Char.ConnectorPunctuation
100 , Char.DashPunctuation
101 , Char.OpenPunctuation
102 , Char.ClosePunctuation
103 , Char.OtherPunctuation
104 ]
105 Unicat_Symbol ->
106 [ Char.MathSymbol
107 , Char.CurrencySymbol
108 , Char.ModifierSymbol
109 , Char.OtherSymbol
110 ]
111 Unicat cat -> [cat]