]> Git — Sourcephile - comptalang.git/blob - lcc/Hcompta/LCC/Megaparsec.hs
Fix balance tests to use new TreeMap.
[comptalang.git] / lcc / Hcompta / LCC / Megaparsec.hs
1 {-# LANGUAGE ViewPatterns #-}
2 {-# LANGUAGE UndecidableInstances #-}
3 {-# OPTIONS_GHC -fno-warn-orphans #-}
4 -- | Symantic and LCC grammar instances for Megaparsec
5 module Hcompta.LCC.Megaparsec where
6
7 import Control.Applicative (Applicative(..))
8 import Control.Monad (Monad(..))
9 import Control.Monad.IO.Class (MonadIO(..))
10 import Data.Bool
11 import Data.Char (Char)
12 import Data.Either (Either(..))
13 import Data.Eq (Eq(..))
14 import Data.Foldable
15 import Data.Function (($), (.))
16 import Data.Functor (Functor(..), (<$>))
17 import Data.Int (Int)
18 import Data.List ((++))
19 import Data.List.NonEmpty (NonEmpty(..))
20 import Data.Maybe (Maybe(..))
21 import Data.Monoid (Monoid(..))
22 import Data.Ord (Ord(..))
23 import Data.Set (Set)
24 import Data.String (IsString(..), String)
25 import Data.Text (Text)
26 import Data.Typeable ()
27 import Data.Word (Word)
28 import Prelude (pred, succ, (-), error)
29 import System.IO (IO)
30 import Text.Show (Show(..))
31 import qualified Control.Applicative as Alt
32 import qualified Control.Exception.Safe as Exn
33 import qualified Control.Monad.Classes as MC
34 import qualified Data.ByteString as BS
35 import qualified Data.Char as Char
36 import qualified Data.List as List
37 import qualified Data.List.NonEmpty as NE
38 import qualified Data.Set as Set
39 import qualified Data.Strict as S
40 import qualified Data.Text as Text
41 import qualified Data.Text.Encoding as Enc
42 import qualified System.Directory as IO
43 import qualified Text.Megaparsec as P
44 import qualified Text.Megaparsec.Prim as P
45
46 import Language.Symantic.Grammar as Sym
47 import qualified Language.Symantic as Sym
48 import qualified Language.Symantic.Document as D
49
50 import Hcompta.LCC.Amount
51 import Hcompta.LCC.Chart
52 import Hcompta.LCC.Posting
53 import Hcompta.LCC.Journal
54 import Hcompta.LCC.Grammar as LCC
55 import Hcompta.LCC.Document
56
57 import Debug.Trace (trace)
58 import Data.Semigroup ((<>))
59 dbg :: Show a => [Char] -> a -> a
60 dbg msg x = trace (msg <> " = " <> show x) x
61
62 -- | Convenient converter.
63 sourcePos :: P.SourcePos -> SourcePos
64 sourcePos (P.SourcePos p l c) = SourcePos p (PosFile $ P.unPos l) (PosFile $ P.unPos c)
65
66 -- * Type 'ParsecC'
67 -- | Convenient alias for defining instances involving 'P.ParsecT'.
68 type ParsecC e s = (P.Token s ~ Char, P.Stream s, P.ErrorComponent e)
69 instance ParsecC e s => IsString (P.ParsecT e s m [Char]) where
70 fromString = P.string
71
72 --
73 -- Readers
74 --
75
76 -- NonEmpty SourcePos
77 instance ParsecC e s => Sym.Gram_Reader (NonEmpty SourcePos) (P.ParsecT e s m) where
78 g_ask_before g = do
79 s <- (sourcePos <$>) . P.statePos <$> P.getParserState
80 ($ s) <$> g
81 g_ask_after g = do
82 f <- g
83 f . (sourcePos <$>) . P.statePos <$> P.getParserState
84 type instance MC.CanDo (P.ParsecT e s m) (MC.EffReader (NonEmpty SourcePos)) = 'True
85 instance ParsecC e s => MC.MonadReaderN 'MC.Zero (NonEmpty SourcePos) (P.ParsecT e s m) where
86 askN _n = (sourcePos <$>) . P.statePos <$> P.getParserState
87 -- SourcePos
88 instance ParsecC e s => Sym.Gram_Reader SourcePos (P.ParsecT e s m) where
89 g_ask_before g = do
90 s <- sourcePos <$> P.getPosition
91 ($ s) <$> g
92 g_ask_after g = do
93 f <- g
94 f . sourcePos <$> P.getPosition
95 type instance MC.CanDo (P.ParsecT e s m) (MC.EffReader SourcePos) = 'True
96 instance ParsecC e s => MC.MonadReaderN 'MC.Zero SourcePos (P.ParsecT e s m) where
97 askN _n = sourcePos <$> P.getPosition
98 -- ()
99 instance ParsecC e s => Sym.Gram_Reader () (P.ParsecT e s m) where
100 g_ask_before = fmap ($ ())
101 g_ask_after = fmap ($ ())
102 -- S.Either Exn.IOException CanonFile
103 instance (ParsecC e s, MonadIO m) => Sym.Gram_Reader (S.Either Exn.IOException CanonFile) (P.ParsecT e s m) where
104 g_ask_before g = do
105 sn <- P.sourceName <$> P.getPosition
106 f <- g
107 liftIO $ Exn.catch
108 (f . S.Right . CanonFile . PathFile <$> IO.canonicalizePath sn)
109 (\exn -> return $ f $ S.Left exn)
110 g_ask_after g = do
111 f <- g
112 sn <- P.sourceName <$> P.getPosition
113 liftIO $ Exn.catch
114 (f . S.Right . CanonFile . PathFile <$> IO.canonicalizePath sn)
115 (\exn -> return $ f $ S.Left exn)
116 type instance MC.CanDo (P.ParsecT e s m) (MC.EffReader (S.Either Exn.IOException CanonFile)) = 'True
117 instance (ParsecC e s, MonadIO m) => MC.MonadReaderN 'MC.Zero (S.Either Exn.IOException CanonFile) (P.ParsecT e s m) where
118 askN _n = do
119 sn <- P.sourceName <$> P.getPosition
120 liftIO $ Exn.catch
121 (S.Right . CanonFile . PathFile <$> IO.canonicalizePath sn)
122 (\exn -> return $ S.Left exn)
123
124 --
125 -- States
126 --
127
128 -- st
129 type instance MC.CanDo (P.ParsecT e s m) (MC.EffState st) = 'False
130 instance (Monad m, MC.MonadState st m) => Sym.Gram_State st m where
131 g_state_before g = do
132 s <- MC.get
133 f <- g
134 let (s', a) = f s
135 MC.put s'
136 return a
137 g_state_after g = do
138 f <- g
139 s <- MC.get
140 let (s', a) = f s
141 MC.put s'
142 return a
143 g_get_before g = do
144 s <- MC.get
145 f <- g
146 return (f s)
147 g_get_after g = do
148 f <- g
149 s <- MC.get
150 return (f s)
151 g_put g = do
152 (s, a) <- g
153 MC.put s
154 return a
155
156 --
157 -- Sym instances
158 --
159 instance (ParsecC e s, Show err) => Sym.Gram_Error err (P.ParsecT e s m) where
160 g_catch me = do
161 e <- me
162 case e of
163 Left err -> fail $ show err
164 Right a -> return a
165 instance ParsecC e s => Sym.Gram_Rule (P.ParsecT e s m) where
166 rule = P.label . Text.unpack
167 instance ParsecC e s => Sym.Gram_Terminal (P.ParsecT e s m) where
168 any = P.anyChar
169 eoi = P.eof
170 char = P.char
171 string = P.string
172 unicat cat = P.satisfy $ (`elem` cats) . Char.generalCategory
173 where cats = unicode_categories cat
174 range (l, h) = P.satisfy $ \c -> l <= c && c <= h
175 Terminal f `but` Terminal p = Terminal $ P.notFollowedBy (P.try p) *> f
176 instance ParsecC e s => Sym.Gram_Alt (P.ParsecT e s m) where
177 empty = Alt.empty
178 (<+>) = (Alt.<|>)
179 choice = P.choice
180 instance ParsecC e s => Sym.Gram_Try (P.ParsecT e s m) where
181 try = P.try
182 instance ParsecC e s => Sym.Gram_RegR (P.ParsecT e s m) where
183 Terminal f .*> Reg x = Reg $ f <*> x
184 instance ParsecC e s => Sym.Gram_RegL (P.ParsecT e s m) where
185 Reg f <*. Terminal x = Reg $ f <*> x
186 instance ParsecC e s => Sym.Gram_App (P.ParsecT e s m) where
187 between = P.between
188 instance ParsecC e s => Sym.Gram_AltApp (P.ParsecT e s m) where
189 option = P.option
190 optional = P.optional
191 many = P.many
192 some = P.some
193 skipMany = P.skipMany
194 instance ParsecC e s => Sym.Gram_CF (P.ParsecT e s m) where
195 CF f <& Reg p = CF $ P.lookAhead f <*> p
196 Reg f &> CF p = CF $ P.lookAhead f <*> p
197 minus (CF f) (Reg p) = CF $ P.notFollowedBy (P.try p) *> f
198 instance ParsecC e s => Sym.Gram_Comment (P.ParsecT e s m)
199 instance ParsecC e s => Sym.Gram_Op (P.ParsecT e s m)
200 instance ParsecC e s => Sym.Gram_Name (P.ParsecT e s m)
201 instance -- Sym.Gram_Type
202 ( ParsecC e s
203 , Gram_Source src (P.ParsecT e s m)
204 ) => Sym.Gram_Type src (P.ParsecT e s m)
205 instance -- Sym.Gram_Term_Type
206 ( ParsecC e s
207 , Gram_Source src (P.ParsecT e s m)
208 ) => Sym.Gram_Term_Type src (P.ParsecT e s m)
209 instance -- Sym.Gram_Term
210 ( ParsecC e s
211 -- , Show src
212 , MC.MonadState (Sym.Imports, Sym.Modules src ss) (P.ParsecT e s m)
213 , Sym.Gram_Source src (P.ParsecT e s m)
214 , Sym.Gram_Term_Atoms src ss (P.ParsecT e s m)
215 ) => Sym.Gram_Term src ss (P.ParsecT e s m)
216
217 --
218 -- LCC instances
219 --
220 instance ParsecC e s => LCC.Gram_Count (P.ParsecT e s m)
221 instance ParsecC e s => LCC.Gram_Char (P.ParsecT e s m)
222 instance ParsecC e s => LCC.Gram_Comment (P.ParsecT e s m)
223 instance ParsecC e s => LCC.Gram_Tag (P.ParsecT e s m)
224 instance ParsecC e s => LCC.Gram_Account (P.ParsecT e s m)
225 instance ParsecC e s => LCC.Gram_Amount (P.ParsecT e s m)
226 instance ParsecC e s => LCC.Gram_File (P.ParsecT e s m)
227 instance -- LCC.Gram_Date
228 ( ParsecC e s
229 , MC.MonadState Year (P.ParsecT e s m)
230 ) => Gram_Date (P.ParsecT e s m)
231 instance -- LCC.Gram_Posting
232 ( ParsecC e s
233 , MC.MonadState Chart (P.ParsecT e s m)
234 , MC.MonadState Style_Amounts (P.ParsecT e s m)
235 , MC.MonadState (S.Maybe Unit) (P.ParsecT e s m)
236 ) => Gram_Posting (P.ParsecT e s m)
237 instance -- LCC.Gram_Transaction
238 ( ParsecC e s
239 , MC.MonadState (S.Maybe Unit) (P.ParsecT e s m)
240 , MC.MonadState Chart (P.ParsecT e s m)
241 , MC.MonadState Section (P.ParsecT e s m)
242 , MC.MonadState Style_Amounts (P.ParsecT e s m)
243 , MC.MonadState Year (P.ParsecT e s m)
244 ) => Gram_Transaction (P.ParsecT e s m)
245 instance -- LCC.Gram_Chart
246 ( ParsecC e s
247 , MC.MonadState Chart (P.ParsecT e s m)
248 , MC.MonadState Section (P.ParsecT e s m)
249 ) => Gram_Chart (P.ParsecT e s m)
250 instance -- LCC.Gram_Path
251 ( ParsecC e s
252 , MonadIO m
253 ) => Gram_Path (P.ParsecT e s m) where
254 g_canonfile g = do
255 pf@(PathFile fp) <- g
256 liftIO $ (pf,) <$> Exn.catch
257 (Right . CanonFile . PathFile <$> IO.canonicalizePath fp)
258 (return . Left)
259 instance -- LCC.Gram_IO
260 ( ParsecC e s
261 , MonadIO m
262 , MC.MonadState (S.Maybe Unit) (P.ParsecT e s m)
263 , MC.MonadState Chart (P.ParsecT e s m)
264 , MC.MonadState Style_Amounts (P.ParsecT e s m)
265 , MC.MonadState Year (P.ParsecT e s m)
266 , Sym.Gram_Source src (P.ParsecT e s m)
267 , P.MonadParsec e Text (P.ParsecT e s m)
268 ) => Gram_IO src (P.ParsecT e s m) where
269 g_read g_path g = do
270 lr <- g_source $ do
271 lr_path <- g_path
272 case lr_path of
273 S.Left (e::Error_Compta src) ->
274 return $ \(src::src) ->
275 S.Left $ At src e
276 S.Right (PathFile fp) ->
277 liftIO $ Exn.catch
278 ((\inp _src -> S.Right $ (fp,) $ Enc.decodeUtf8 inp) <$> BS.readFile fp)
279 (\exn -> return $ \src -> S.Left $ At src $ Error_Compta_Read (PathFile fp) exn)
280 case lr of
281 S.Left e -> do
282 return $ S.Left [e]
283 S.Right (fp_new, s_new) -> do
284 P.pushPosition $ P.initialPos fp_new
285 s_old <- P.getInput; P.setInput s_new
286
287 lr_a <- g
288 {-
289 P.observing g >>= \case
290 Left err -> do
291 MC.put jf_old
292 P.setInput s_old
293 P.popPosition
294 P.updateParserState (\st -> st{P.statePos=P.errorPos $ trace ("ERROR: " <> show err) err})
295 P.failure
296 (P.errorUnexpected err)
297 (P.errorExpected err)
298 (P.errorCustom err)
299 Right a -> return a
300 -}
301
302 P.setInput s_old
303 P.popPosition
304
305 return lr_a
306 instance -- LCC.Gram_Compta
307 ( ParsecC e s
308 , MonadIO m
309 , MC.MonadState (Sym.Modules src ss) m
310 , MC.MonadState (Sym.Name2Type src) m
311 , MC.MonadState (Sym.Imports, Sym.Modules src ss) m
312 , MC.MonadState Terms m
313 , Gram_Input (P.ParsecT e s m)
314 , Monoid j
315 -- , Show j
316 -- , Show src
317 , Inj_Source (Sym.AST_Type src) src
318 , Inj_Source (Sym.KindK src) src
319 , Inj_Source (Sym.TypeVT src) src
320 , P.MonadParsec e Text (P.ParsecT e s m)
321 , Sym.Gram_Source src (P.ParsecT e s m)
322 , Sym.Gram_Term_Atoms src ss (P.ParsecT e s m)
323 , MC.MonadReader (S.Either Exn.IOException CanonFile) (P.ParsecT e s m)
324 , MC.MonadState (Context_Read src j) (P.ParsecT e s m)
325 , MC.MonadState (Journal j) (P.ParsecT e s m)
326 , MC.MonadState (Journals j) (P.ParsecT e s m)
327 , MC.MonadState (S.Maybe Unit) (P.ParsecT e s m)
328 , MC.MonadState Chart (P.ParsecT e s m)
329 , MC.MonadState Section (P.ParsecT e s m)
330 , MC.MonadState Style_Amounts (P.ParsecT e s m)
331 , MC.MonadState Year (P.ParsecT e s m)
332 ) => Gram_Compta ss src j (P.ParsecT e s m)
333 instance -- LCC.Gram_Term_Def
334 ( ParsecC e s
335 -- , MC.MonadState (Env src ss) m
336 , MC.MonadState (Sym.Name2Type src) m
337 , MC.MonadState (Sym.Imports, Sym.Modules src ss) m
338 , Monad m
339 -- , Show src
340 , Inj_Source (Sym.AST_Type src) src
341 , Inj_Source (Sym.KindK src) src
342 , Inj_Source (Sym.TypeVT src) src
343 , Gram_Source src (P.ParsecT e s m)
344 , P.MonadParsec e Text (P.ParsecT e s m)
345 , Sym.Gram_Term_Atoms src ss (P.ParsecT e s m)
346 ) => LCC.Gram_Term_Def src ss (P.ParsecT e s m)
347
348 instance ParsecC e Text => Gram_Input (P.ParsecT e Text m) where
349 g_input g = do
350 P.State {P.stateInput=ib, P.statePos=P.SourcePos _ lb bc:|_} <- P.getParserState
351 f <- g
352 P.State {P.statePos=P.SourcePos _ le ce:|_} <- P.getParserState
353 return $ f $ (`Text.take` ib) $ sizeInput 0 ib
354 (P.unPos le - P.unPos lb)
355 (if lb == le then P.unPos ce - P.unPos bc else pred (P.unPos ce))
356 where
357 sizeInput :: Int -> Text -> Word -> Word -> Int
358 sizeInput s _i 0 0 = s
359 sizeInput s i 0 c =
360 case Text.uncons i of
361 Nothing -> error "[BUG] sizeInput"
362 Just (_c, i') -> sizeInput (succ s) i' 0 (pred c)
363 sizeInput s i l c =
364 case Text.uncons i of
365 Nothing -> error "[BUG] sizeInput"
366 Just ('\n', i') -> sizeInput (succ s) i' (pred l) c
367 Just (_c, i') -> sizeInput (succ s) i' l c
368
369 -- syntaxError :: P.ParseError Char P.Dec -> Text
370 -- syntaxError
371
372 parseErrorPretty ::
373 ( Ord t
374 , P.ShowToken t
375 , P.ShowErrorComponent e )
376 => P.ParseError t e
377 -> String
378 parseErrorPretty e =
379 sourcePosStackPretty (P.errorPos e) ++ ":\n"
380 ++ parseErrorTextPretty e
381
382 -- | Pretty-print stack of source positions.
383 sourcePosStackPretty :: NonEmpty P.SourcePos -> String
384 sourcePosStackPretty ms = concatMap f rest ++ P.sourcePosPretty pos
385 where
386 (pos :| rest') = ms
387 rest = List.reverse rest'
388 f p = "in file included from " ++ P.sourcePosPretty p ++ ",\n"
389
390 showParseError ::
391 ( Ord t
392 , P.ShowToken t
393 , P.ShowErrorComponent e
394 , D.Doc_Text d
395 , D.Doc_Color d
396 , D.Doc_Decoration d
397 ) => P.ParseError t e -> IO d
398 showParseError err = do
399 let (pos:|_) = P.errorPos err
400 q <- d_sourcepos $ sourcePos pos
401 return $ D.catV
402 [ D.stringH (sourcePosStackPretty $ P.errorPos err) <> ":"
403 , D.stringH $ parseErrorTextPretty err
404 , q
405 ]
406
407 -- | Transforms list of error messages into their textual representation.
408 messageItemsPretty :: P.ShowErrorComponent a => String -> Set a -> String
409 messageItemsPretty prefix ts
410 | Set.null ts = ""
411 | otherwise = prefix ++ f ts ++ "\n"
412 where f = orList . NE.fromList . Set.toAscList . Set.map P.showErrorComponent
413
414 orList :: NonEmpty String -> String
415 orList (x:|[]) = x
416 orList (x:|[y]) = x ++ " or " ++ y
417 orList xs = List.intercalate ", " (NE.init xs) ++ ", or " ++ NE.last xs
418
419 -- | Pretty-print textual part of a 'ParseError', that is, everything except
420 -- stack of source positions. The rendered staring always ends with a new line.
421 parseErrorTextPretty ::
422 ( Ord t
423 , P.ShowToken t
424 , P.ShowErrorComponent e )
425 => P.ParseError t e
426 -> String
427 parseErrorTextPretty (P.ParseError _ us ps xs) =
428 if Set.null us && Set.null ps && Set.null xs
429 then "unknown parse error\n"
430 else concat
431 [ messageItemsPretty "unexpected " us
432 , messageItemsPretty "expecting " ps
433 , List.unlines (P.showErrorComponent <$> Set.toAscList xs)
434 ]