]> Git — Sourcephile - haskell/symantic.git/blob - Language/Symantic/Parsing/Test.hs
Add Parsing.Grammar.
[haskell/symantic.git] / Language / Symantic / Parsing / Test.hs
1 {-# LANGUAGE AllowAmbiguousTypes #-}
2 {-# LANGUAGE ConstrainedClassMethods #-}
3 {-# LANGUAGE ConstraintKinds #-}
4 {-# LANGUAGE NoMonomorphismRestriction #-}
5 {-# LANGUAGE PatternGuards #-}
6 {-# LANGUAGE TupleSections #-}
7 {-# LANGUAGE TypeInType #-}
8 {-# LANGUAGE UndecidableInstances #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
11 module Parsing.Test where
12
13 import qualified Data.Char as Char
14 import qualified Data.MonoTraversable as MT
15 import qualified Data.Kind as Kind
16 import Data.Text (Text)
17 import qualified Data.Text as Text
18 import qualified Data.List as List
19 import Data.String (IsString(..))
20 import Data.Proxy
21
22 import Language.Symantic.Parsing
23 import Language.Symantic.Typing
24 import Language.Symantic.Compiling (TokenT(..))
25
26 -- * Type 'Syntax'
27 data Syntax a
28 = Syntax a [Syntax a]
29 deriving (Eq)
30 instance Monoid (Syntax Text) where
31 mempty = Syntax "" []
32 mappend (Syntax "" []) x = x
33 mappend x (Syntax "" []) = x
34 mappend x y = Syntax " " [x, y]
35
36 -- * Class 'Sy'
37 class Sy c where
38 type SyT c
39 sy ::
40 ( Show_Const '[Proxy c]
41 , Inj_Const '[Proxy c] c
42 ) => SyT c
43 instance Sy (c::Kind.Type) where
44 type SyT c = Syntax Text
45 sy = Syntax (Text.pack $ show_const (inj_const::Const '[Proxy c] c)) []
46 instance Sy (c::a -> b) where
47 type SyT c = [Syntax Text] -> Syntax Text
48 sy = Syntax (Text.pack $ show_const (inj_const::Const '[Proxy c] c))
49
50 -- | Custom 'Show' instance a little bit more readable
51 -- than the automatically derived one.
52 instance Show (Syntax Text) where
53 showsPrec p ast@(Syntax name args) =
54 let n = Text.unpack name in
55 case ast of
56 Syntax _ [] -> showString n
57 Syntax "(->)" [a] ->
58 showParen (p Prelude.<= prec_arrow) $
59 showString (""++n++" ") .
60 showsPrec prec_arrow a
61 Syntax "(->)" [a, b] ->
62 showParen (p Prelude.<= prec_arrow) $
63 showsPrec prec_arrow a .
64 showString (" -> ") .
65 showsPrec (prec_arrow Prelude.+ 1) b
66 Syntax "\\" [var, typ, body] ->
67 showParen (p Prelude.<= prec_lambda) $
68 showString ("\\(") .
69 showsPrec prec_lambda var .
70 showString (":") .
71 showsPrec prec_lambda typ .
72 showString (") -> ") .
73 showsPrec prec_lambda body
74 Syntax " " (fun:as) ->
75 showParen (p Prelude.<= prec_app) $
76 showsPrec prec_dollar fun .
77 List.foldl
78 (\acc arg ->
79 acc . showString (" ") .
80 showsPrec prec_dollar arg)
81 (showString ("")) as
82 Syntax "$" [fun, arg] ->
83 showParen (p Prelude.<= prec_dollar) $
84 showsPrec prec_dollar fun .
85 showString (" $ ") .
86 showsPrec prec_dollar arg
87 _ ->
88 showParen (p Prelude.<= prec_app) $
89 showString n .
90 showString " " .
91 showString (List.unwords $ show Prelude.<$> args)
92 where
93 prec_arrow = 1
94 prec_lambda = 1
95 prec_dollar = 1
96 prec_app = 10
97
98 -- * Class 'Tokenize'
99 -- | A minimal parser, dispatched in many class instances.
100 -- One could also have used a framework like megaparsec,
101 -- and many class instances to handle 'TokenT's
102 -- (and be able to handle the fact that
103 -- they may have a different number of arguments).
104 -- Here 'TokenizeR' try each 'TokenizeT'
105 -- up until one works, but this could
106 -- also be made more efficient by
107 -- building a 'Map' instead of these nested cases.
108 type Tokenize ast meta ts = TokenizeR ast meta ts ts
109
110 tokenize :: forall meta ast ts. Tokenize ast meta ts
111 => ast -> Either (Error_Syntax ast) (EToken meta ts)
112 tokenize = tokenizeR (Proxy::Proxy ts)
113
114 -- ** Class 'TokenizeR'
115 class TokenizeR ast meta ts rs where
116 tokenizeR :: Proxy rs -> ast -> Either (Error_Syntax ast) (EToken meta ts)
117 instance
118 ( TokenizeT ast meta ts (Proxy Token_Var)
119 , TokenizeR ast meta ts ts
120 , Inj_Token meta ts (->)
121 , Monoid meta
122 ) => TokenizeR ast meta ts '[] where
123 tokenizeR _rs ast =
124 case tokenizeT (Proxy::Proxy (Proxy Token_Var)) ast of
125 Nothing -> Left $ Error_Syntax_unsupported ast
126 Just (Left err) -> Left err
127 Just (Right (as, tok)) ->
128 List.foldl (\mf ma -> do
129 a <- tokenize ma
130 f <- mf
131 Right $ EToken $ inj_token (meta_of f `mappend` meta_of a) $
132 Token_Term_App f a
133 ) (Right tok) as
134 instance
135 ( TokenizeT ast meta ts t
136 , TokenizeR ast meta ts ts
137 , TokenizeR ast meta ts rs
138 , Inj_Token meta ts (->)
139 ) => TokenizeR ast meta ts (t ': rs) where
140 tokenizeR _ ast =
141 case tokenizeT (Proxy::Proxy t) ast of
142 Nothing -> tokenizeR (Proxy::Proxy rs) ast
143 Just (Left err) -> Left err
144 Just (Right (as, tok)) ->
145 List.foldl (\mf ma -> do
146 a <- tokenize ma
147 f <- mf
148 Right $ EToken $ inj_token (meta_of f {-`mappend` meta_of a-}) $
149 Token_Term_App f a
150 ) (Right tok) as
151
152 -- Type 'Token_Var'
153 data Token_Var
154 instance
155 Inj_Token (Syntax Text) ts (->) =>
156 TokenizeT (Syntax Text) (Syntax Text) ts (Proxy Token_Var) where
157 tokenizeT _t meta@(Syntax x as)
158 | Just (x0, xs) <- Text.uncons x
159 , Char.isLetter x0 && Char.isLower x0
160 , MT.oall (\c -> Char.isLetter c || Char.isNumber c) xs
161 = Just $ Right $ (as,) $ EToken $ inj_token meta $
162 Token_Term_Var x
163 tokenizeT _t _sy = Nothing
164
165 -- ** Class 'TokenizeT'
166 class TokenizeT ast meta ts t where
167 tokenizeT :: Proxy t -> ast
168 -> Maybe ( Either (Error_Syntax ast)
169 ([ast], EToken meta ts) )
170 instance
171 ( Inj_Token (Syntax Text) ts (->)
172 , Tokenize (Syntax Text) (Syntax Text) ts ) =>
173 TokenizeT (Syntax Text) (Syntax Text) ts (Proxy (->)) where
174 tokenizeT _t (Syntax "\\" (Syntax n [] : ast_ty : ast_te : as)) = Just $ do
175 tok_ty <- tokenize_type ast_ty
176 tok_te <- tokenize ast_te
177 Right $ (as,) $ EToken $
178 inj_token (Syntax "\\" [Syntax n [], ast_ty, ast_te]) $
179 Token_Term_Abst n tok_ty tok_te
180 tokenizeT _t (Syntax " " (ast_f : ast_x : as)) = Just $ do
181 tok_f <- tokenize ast_f
182 tok_x <- tokenize ast_x
183 Right $ (as,) $ EToken $
184 inj_token (Syntax " " [ast_f, ast_x]) $
185 Token_Term_App tok_f tok_x
186 tokenizeT _t (Syntax "let" (Syntax n [] : ast_te : ast_in : as)) = Just $ do
187 tok_te <- tokenize ast_te
188 tok_in <- tokenize ast_in
189 Right $ (as,) $ EToken $
190 inj_token (Syntax "let" [Syntax n [], ast_te, ast_in]) $
191 Token_Term_Let n tok_te tok_in
192 tokenizeT _t _sy = Nothing
193 instance
194 ( Inj_Token (Syntax Text) ts Int
195 {-, Tokenize (Syntax Text) (Syntax Text) ts-} ) =>
196 TokenizeT (Syntax Text) (Syntax Text) ts (Proxy Int) where
197 tokenizeT _t (Syntax "int" (ast_i : as)) = Just $ do
198 i <- read_syntax ast_i
199 Right $ (as,) $ EToken $ inj_token (Syntax "int" [ast_i]) $
200 Token_Term_Int i
201 tokenizeT _t _sy = Nothing
202 instance
203 ( Inj_Token (Syntax Text) ts []
204 , Tokenize (Syntax Text) (Syntax Text) ts ) =>
205 TokenizeT (Syntax Text) (Syntax Text) ts (Proxy []) where
206 tokenizeT _t meta@(Syntax "list" (ast_ty : ast_as)) = Just $ do
207 typ <- tokenize_type ast_ty
208 as <- tokenize `mapM` ast_as
209 Right $ ([],) $ EToken $ inj_token meta $
210 Token_Term_List_list typ as
211 tokenizeT _t (Syntax "zipWith" (ast_f : as)) = Just $ do
212 f <- tokenize ast_f
213 Right $ (as,) $ EToken $ inj_token (Syntax "zipWith" [ast_f]) $
214 Token_Term_List_zipWith f
215 tokenizeT _t _sy = Nothing
216 instance
217 ( Inj_Token (Syntax Text) ts Char ) =>
218 TokenizeT (Syntax Text) (Syntax Text) ts (Proxy Char) where
219 tokenizeT _t (Syntax "char" (ast_c : as)) = Just $ do
220 c <- read_syntax ast_c
221 Right $ (as,) $ EToken $ inj_token (Syntax "char" [ast_c]) $
222 Token_Term_Char c
223 tokenizeT _t (Syntax "Char.toUpper" as) = Just $
224 Right $ (as,) $ EToken $ inj_token (Syntax "Char.toUpper" []) $
225 Token_Term_Char_toUpper
226 tokenizeT _t _sy = Nothing
227 instance
228 ( Inj_Token (Syntax Text) ts Text ) =>
229 TokenizeT (Syntax Text) (Syntax Text) ts (Proxy Text) where
230 tokenizeT _t (Syntax "text" (ast_t : as)) = Just $ do
231 t <- read_syntax ast_t
232 Right $ (as,) $ EToken $ inj_token (Syntax "text" [ast_t]) $
233 Token_Term_Text t
234 tokenizeT _t _sy = Nothing
235 instance
236 ( Inj_Token (Syntax Text) ts Maybe
237 , Tokenize (Syntax Text) (Syntax Text) ts ) =>
238 TokenizeT (Syntax Text) (Syntax Text) ts (Proxy Maybe) where
239 tokenizeT _t (Syntax "Just" (ast_a : as)) = Just $ do
240 a <- tokenize ast_a
241 Right $ (as,) $ EToken $ inj_token (Syntax "Just" [ast_a]) $
242 Token_Term_Maybe_Just a
243 tokenizeT _t _sy = Nothing
244 instance
245 ( Inj_Token (Syntax Text) ts (,)
246 , Tokenize (Syntax Text) (Syntax Text) ts ) =>
247 TokenizeT (Syntax Text) (Syntax Text) ts (Proxy (,)) where
248 tokenizeT _t (Syntax "(,)" (ast_a : ast_b : as)) = Just $ do
249 a <- tokenize ast_a
250 b <- tokenize ast_b
251 Right $ (as,) $ EToken $ inj_token (Syntax "(,)" [ast_a, ast_b]) $
252 Token_Term_Tuple2 a b
253 tokenizeT _t (Syntax "fst" (ast_t : as)) = Just $ do
254 t <- tokenize ast_t
255 Right $ (as,) $ EToken $ inj_token (Syntax "fst" [ast_t]) $
256 Token_Term_Tuple2_fst t
257 tokenizeT _t (Syntax "snd" (ast_t : as)) = Just $ do
258 t <- tokenize ast_t
259 Right $ (as,) $ EToken $ inj_token (Syntax "snd" [ast_t]) $
260 Token_Term_Tuple2_snd t
261 tokenizeT _t _sy = Nothing
262 instance
263 ( Inj_Token (Syntax Text) ts Num
264 , Tokenize (Syntax Text) (Syntax Text) ts ) =>
265 TokenizeT (Syntax Text) (Syntax Text) ts (Proxy Num) where
266 tokenizeT _t (Syntax "(+)" (ast_x : as)) = Just $ do
267 x <- tokenize ast_x
268 Right $ (as,) $ EToken $ inj_token (Syntax "(+)" [ast_x]) $
269 Token_Term_Num_add x
270 tokenizeT _t _sy = Nothing
271 instance
272 ( Inj_Token (Syntax Text) ts Monoid
273 , Tokenize (Syntax Text) (Syntax Text) ts ) =>
274 TokenizeT (Syntax Text) (Syntax Text) ts (Proxy Monoid) where
275 tokenizeT _t (Syntax "mappend" (ast_x : as)) = Just $ do
276 x <- tokenize ast_x
277 Right $ (as,) $ EToken $ inj_token (Syntax "mappend" [ast_x]) $
278 Token_Term_Monoid_mappend x
279 tokenizeT _t _sy = Nothing
280
281 tokenize_type
282 :: Inj_Token (Syntax Text) ts Token_Type
283 => Syntax Text
284 -> Either (Error_Syntax (Syntax Text)) (EToken (Syntax Text) ts)
285 tokenize_type meta@(Syntax n as)
286 | Just (c, _) <- Text.uncons n
287 , (Char.isUpper c && MT.oall Char.isLetter n)
288 || MT.oall (\x -> Char.isSymbol x || Char.isPunctuation x) n =
289 EToken . inj_token meta . Token_Type n
290 <$> sequence (tokenize_type <$> as)
291 tokenize_type syn = Left $ Error_Syntax_unsupported syn
292
293 -- * Type 'Error_Syntax'
294 data Error_Syntax ast
295 = Error_Syntax_read ast Text
296 | Error_Syntax_unsupported ast
297 deriving (Eq, Show)
298
299 syLam x typ te = Syntax "\\" [syVar x, typ, te]
300 syVar x = Syntax x []
301 syApp f x = Syntax " " [f, x]
302 syLet x b i = Syntax "let" [syVar x, b, i]
303
304 syLit :: forall c.
305 ( Show_Const '[Proxy c]
306 , Inj_Const '[Proxy c] c
307 , Show c
308 ) => c -> Syntax Text
309 syLit x = Syntax n [Syntax (Text.pack $ show x) []]
310 where
311 c:cs = show_const (inj_const::Const '[Proxy c] c)
312 n = Text.cons (Char.toLower c) $ Text.pack cs
313
314 syFun :: IsString a => [Syntax a] -> Syntax a
315 syFun = Syntax "(->)"
316
317 (.>) :: IsString a => Syntax a -> Syntax a -> Syntax a
318 a .> b = syFun [a, b]
319 infixr 3 .>
320
321 read_syntax :: Read a => Syntax Text -> Either (Error_Syntax (Syntax Text)) a
322 read_syntax ast@(Syntax t as) =
323 case reads $ Text.unpack t of
324 [(x, "")] | List.null as -> Right x
325 _ -> Left $ Error_Syntax_read ast t
326
327 maybeRight :: Either l r -> Maybe r
328 maybeRight (Right r) = Just r
329 maybeRight Left{} = Nothing