{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE TypeFamilies #-} module Language.TCT.Read ( module Language.TCT.Read.Tree , module Language.TCT.Read.Token , module Language.TCT.Read.Cell , module Language.TCT.Read ) where import Control.Applicative (Applicative(..)) import Data.Char (Char) import Data.Either (Either(..)) import Data.Eq (Eq(..)) import Data.Function (($)) import Data.Functor ((<$>)) import Data.Maybe (Maybe(..)) import Data.Ord (Ord(..)) import Data.Proxy (Proxy(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq) import Data.String (IsString) import Data.Text (Text) import Data.Traversable (Traversable(..)) import Data.TreeSeq.Strict (Tree) import Data.Tuple (snd) import Data.Void (Void) import System.IO (FilePath) import qualified Data.Text as Text import qualified Data.TreeSeq.Strict as TreeSeq import qualified Text.Megaparsec as P import Language.TCT.Tree import Language.TCT.Token import Language.TCT.Cell import Language.TCT.Read.Cell import Language.TCT.Read.Tree import Language.TCT.Read.Token -- * Type 'TCT' type TCT = Tree (Cell Key) Tokens -- * Type 'TCTs' type TCTs = Seq TCT readTCTs :: FilePath -> Text -> Either (P.ParseError (P.Token Text) (P.ErrorFancy Void)) TCTs readTCTs inp txt = do tct <- P.runParser (p_Trees <* P.eof) inp txt (`traverse` tct) $ \tr -> sequence $ (`TreeSeq.mapWithKey`tr) $ \key c@(Cell pos _posEnd t) -> case key of -- Verbatim Keys Just (unCell -> KeyBar{}) -> Right $ tokens [TokenPlain <$> c] Just (unCell -> KeyLower{}) -> Right $ tokens [TokenPlain <$> c] Just (unCell -> KeyEqual{}) -> Right $ tokens [TokenPlain <$> c] -- Token Keys _ -> snd $ P.runParser' (p_Tokens <* P.eof) P.State { P.stateInput = StreamCell t , P.statePos = pure $ P.SourcePos inp (P.mkPos $ linePos pos) (P.mkPos $ columnPos pos) , P.stateTabWidth = P.mkPos $ columnPos pos , P.stateTokensProcessed = 0 } -- * Type 'StreamCell' -- | Wrap 'Text' to have a 'P.Stream' instance -- whose 'P.advance1' method abuses the tab width state -- to instead pass the line indent. -- This in order to report correct 'P.SourcePos' -- when parsing a 'Cell' containing newlines. newtype StreamCell = StreamCell { unStreamCell :: Text } deriving (IsString,Eq,Ord) instance P.Stream StreamCell where type Token StreamCell = Char type Tokens StreamCell = StreamCell take1_ (StreamCell t) = (StreamCell <$>) <$> P.take1_ t takeN_ n (StreamCell t) = (\(ts,s) -> (StreamCell ts, StreamCell s)) <$> P.takeN_ n t takeWhile_ f (StreamCell t) = (\(ts,s) -> (StreamCell ts, StreamCell s)) $ P.takeWhile_ f t tokensToChunk _s ts = StreamCell (P.tokensToChunk (Proxy::Proxy Text) ts) chunkToTokens _s (StreamCell ch) = P.chunkToTokens (Proxy::Proxy Text) ch chunkLength _s (StreamCell ch) = P.chunkLength (Proxy::Proxy Text) ch advance1 _s = advance1 advanceN _s indent pos (StreamCell t) = Text.foldl' (advance1 indent) pos t advance1 :: P.Pos -> P.SourcePos -> Char -> P.SourcePos advance1 indent (P.SourcePos n line col) c = case c of '\n' -> P.SourcePos n (line <> P.pos1) indent _ -> P.SourcePos n line (col <> P.pos1)