{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE Rank2Types #-} {-# LANGUAGE TypeFamilies #-} module Textphile.TCT.Read.Cell where import Control.Applicative (Applicative(..)) import Control.Monad (Monad(..)) import Control.Monad.Trans.Class (MonadTrans(..)) import Data.Char (Char) import Data.Either (Either(..)) import Data.Eq (Eq(..)) import Data.Function (($), (.), id) import Data.Functor ((<$>)) import Data.Int (Int) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..)) import Data.Ord (Ord) import Data.Proxy (Proxy(..)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString) import Data.Tuple (fst, snd) import Text.Show (ShowS) import Prelude (max, rem, (+), (-), fromIntegral) import qualified Control.Monad.Trans.Reader as R import qualified Data.Set as Set import qualified Data.Text.Lazy as TL import qualified Text.Megaparsec as P import Textphile.TCT.Cell -- * Type 'Parser' -- | Convenient alias. type Parser e s a = Parsable e s a => P.ParsecT e s (R.Reader [FileRange LineColumn]) a -- ** Type 'Offset' type Offset = Int -- ** Type 'Parsable' type Parsable e s a = ( P.Stream s , P.Token s ~ Char , Ord e , IsString (P.Tokens s) , P.ShowErrorComponent e ) -- | Like 'P.satisfy' but with a predicate returning 'Maybe' instead of 'Bool'. p_satisfyMaybe :: P.MonadParsec e s m => (P.Token s -> Maybe a) -> m a p_satisfyMaybe = (`P.token` Set.empty) p_LineColumn :: Parser e s LineColumn p_LineColumn = do P.SourcePos{..} <- P.getSourcePos return $ LineColumn sourceLine sourceColumn p_Cell :: Parser e s a -> Parser e s (Cell a) p_Cell pa = do path <- lift R.ask b <- P.getSourcePos a <- pa e <- P.getSourcePos let fileRange_begin = LineColumn (P.sourceLine b) (P.sourceColumn b) let fileRange_end = LineColumn (P.sourceLine e) (P.sourceColumn e) let fileRange = FileRange {fileRange_file=P.sourceName b, fileRange_begin, fileRange_end} return $ Sourced (fileRange:|path) a -- | Wrapper around |P.runParserT'| -- to use given 'Sourced' as starting position. runParserOnCell :: Parsable e StreamCell a => Parser e StreamCell a -> Cell TL.Text -> Either (P.ParseErrorBundle StreamCell e) a runParserOnCell p (Sourced (FileRange inp bp _ep :| path) s) = snd $ (`R.runReader` path) $ P.runParserT' (p <* P.eof) P.State { P.stateInput = StreamCell s , P.stateOffset = 0 , P.statePosState = P.PosState { P.pstateInput = StreamCell s , P.pstateOffset = 0 , P.pstateSourcePos = P.SourcePos inp (lineNum bp) (colNum bp) , P.pstateTabWidth = colNum bp , P.pstateLinePrefix = "" } } -- * Type 'StreamCell' -- | Wrap 'TL.Text' to have a 'P.Stream' instance -- whose 'P.advance1' method abuses the tab width state -- to instead pass the line indent. -- This in order to report correct 'P.SourcePos' -- when parsing a 'Cell' containing newlines. newtype StreamCell = StreamCell { unStreamCell :: TL.Text } deriving (IsString,Eq,Ord) {- instance P.Stream TL.Text where type Token TL.Text = Char type Tokens TL.Text = TL.Text tokenToChunk _s = TL.singleton tokensToChunk _s = TL.pack chunkToTokens _s = TL.unpack chunkLength _s = fromIntegral . TL.length chunkEmpty _s = TL.null take1_ = TL.uncons takeN_ n s | n <= 0 = Just (TL.empty, s) | TL.null s = Nothing | otherwise = Just (TL.splitAt (fromIntegral n) s) takeWhile_ = TL.span showTokens _s = stringPretty -} data St = St P.SourcePos ShowS instance P.Stream StreamCell where type Token StreamCell = Char type Tokens StreamCell = TL.Text take1_ (StreamCell t) = (StreamCell <$>) <$> P.take1_ t takeN_ n (StreamCell t) = (StreamCell <$>) <$> P.takeN_ n t takeWhile_ f (StreamCell t) = StreamCell <$> P.takeWhile_ f t tokensToChunk _s = TL.pack chunkToTokens _s = TL.unpack chunkLength _s = fromIntegral . TL.length reachOffset o P.PosState {..} = ( spos , case {- expandTab pstateTabWidth . -} addPrefix . f . fromToks . fst $ P.takeWhile_ (/= '\n') post of "" -> "" xs -> xs , P.PosState { pstateInput = StreamCell post , pstateOffset = max pstateOffset o , pstateSourcePos = spos , pstateTabWidth = pstateTabWidth , pstateLinePrefix = if sameLine -- NOTE We don't use difference lists here because it's -- desirable for 'P.PosState' to be an instance of 'Eq' and -- 'Show'. So we just do appending here. Fortunately several -- parse errors on the same line should be relatively rare. then pstateLinePrefix <> f "" else f "" } ) where addPrefix xs = if sameLine then pstateLinePrefix <> xs else xs sameLine = P.sourceLine spos == P.sourceLine pstateSourcePos (pre, post) = TL.splitAt (fromIntegral $ o - pstateOffset) $ unStreamCell pstateInput St spos f = TL.foldl' go (St pstateSourcePos id) pre fromToks = TL.unpack fromTok = id w = 4 go (St (P.SourcePos n l c) g) = \case '\n' -> St (P.SourcePos n (l <> P.pos1) pstateTabWidth) id ch@'\t' -> St (P.SourcePos n l (P.mkPos $ P.unPos c + w - ((P.unPos c - 1) `rem` w))) (g . (fromTok ch :)) ch -> St (P.SourcePos n l (c <> P.pos1)) (g . (fromTok ch :)) reachOffsetNoLine o P.PosState{..} = ( spos , P.PosState { pstateInput = StreamCell post , pstateOffset = max pstateOffset o , pstateSourcePos = spos , pstateTabWidth = pstateTabWidth , pstateLinePrefix = pstateLinePrefix } ) where spos = TL.foldl' go pstateSourcePos pre (pre, post) = TL.splitAt (fromIntegral $ o - pstateOffset) $ unStreamCell pstateInput w = 4 go (P.SourcePos n l c) = \case '\n' -> P.SourcePos n (l <> P.pos1) pstateTabWidth '\t' -> P.SourcePos n l $ P.mkPos $ P.unPos c + w - ((P.unPos c - 1) `rem` w) _ -> P.SourcePos n l (c <> P.pos1) showTokens _s = P.showTokens (Proxy::Proxy TL.Text)