module Language.TCT.Read ( module Language.TCT.Read.Tree , module Language.TCT.Read.Token , module Language.TCT.Read.Cell , module Language.TCT.Read ) where import Control.Applicative (Applicative(..)) import Control.Arrow (left) import Control.Monad (Monad(..), join) import Data.Bool import Data.Either (Either(..)) import Data.Eq (Eq) import Data.Function (($), (.)) import Data.Functor ((<$>)) import Data.Ord (Ord(..)) import Data.Set (Set) import Data.Traversable (Traversable(..)) import Data.TreeSeq.Strict (Tree(..), Trees) import Data.Void (Void) import System.FilePath (()) import System.IO (FilePath, IO) import Text.Show (Show(..), showParen, showString, showChar) import qualified Data.ByteString.Lazy as BSL import qualified Data.Set as Set import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL import qualified System.FilePath as FilePath import qualified Text.Megaparsec as P import Language.TCT.Debug import Language.TCT.Tree import Language.TCT.Cell import Language.TCT.Read.Cell import Language.TCT.Read.Tree import Language.TCT.Read.Token -- | Parsing is done in two phases: -- -- 1. indentation-sensitive parsing on 'TL.Text' -- 2. Pair-sensitive parsing on some 'NodeText's resulting of 1. readTCT :: FilePath -> TL.Text -> Either ErrorRead (Trees (Cell Node)) readTCT inp txt = do trs <- left ErrorReadParser $ P.runParser (p_Trees <* P.eof) inp txt join <$> traverse (go $ NodeHeader HeaderDash) (debug0 "readTCT" trs) where go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node)) go parent t@(Tree c@(Cell bn en nod) ts) = case nod of NodeLower{} -> Right $ pure t -- NOTE: preserve NodeText "" NodeText n | TL.null n -> Right $ pure t NodeText n -> case parent of NodeHeader HeaderBar{} -> Right $ pure t NodeHeader HeaderEqual{} -> Right $ pure t NodeHeader HeaderDashDash{} -> Right $ pure t _ -> left ErrorReadParser $ parseTokens <$> parseLexemes inp (Cell bn en n) _ -> pure . Tree c . join <$> traverse (go nod') ts where -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText' nod' = case nod of NodePara -> parent _ -> nod readFile :: FilePath -> IO TL.Text readFile fp = TL.decodeUtf8 <$> BSL.readFile fp readTCTrec :: FilePath -> IO (Either ErrorRead (Trees (Cell Node))) readTCTrec = goFile Set.empty (NodeHeader HeaderDash) where goFile :: Set FilePath -> Node -> FilePath -> IO (Either ErrorRead (Trees (Cell Node))) goFile inputFiles parentNode inpFile | Set.member inpFileNorm inputFiles = return $ Left $ ErrorReadIncludeLoop inpFileNorm | otherwise = do inpText <- readFile inpFileNorm case P.runParser (p_Trees <* P.eof) inpFileNorm inpText of Left err -> return $ Left $ ErrorReadParser err Right trees -> (join <$>) . sequence <$> traverse (goTree (Set.insert inpFileNorm inputFiles) parentNode) (debug0 "readTCTrec" trees) where inpFileNorm = FilePath.normalise inpFile goTree :: Set FilePath -> Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node))) goTree inpFiles parNode t@(Tree c@(Cell bn en nod) ts) = case nod of NodeLower{} -> return $ Right $ pure t -- NOTE: preserve NodeText "" NodeText n | TL.null n -> return $ Right $ pure t NodeText n -> case parNode of NodeHeader HeaderBar{} -> return $ Right $ pure t NodeHeader HeaderEqual{} -> return $ Right $ pure t NodeHeader HeaderDashDash{} -> return $ Right $ pure t _ -> return $ left ErrorReadParser $ parseTokens <$> parseLexemes inpFileNorm (Cell bn en n) NodeHeader (HeaderDotSlash incFile) -> goFile inpFiles parNode $ FilePath.takeDirectory inpFileNorm incFile _ -> (pure . Tree c . join <$>) . sequence <$> traverse (goTree inpFiles nod') ts where -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText' nod' = case nod of NodePara -> parNode _ -> nod -- * Type 'ErrorRead' data ErrorRead = ErrorReadParser (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void)) | ErrorReadIncludeLoop FilePath deriving (Eq) instance Show ErrorRead where showsPrec p = \case ErrorReadParser e -> showParen (p >= 10) $ showString (P.parseErrorPretty e) ErrorReadIncludeLoop file -> showString "ErrorReadIncludeLoop" . showChar ' ' . showsPrec 10 file