module Language.TCT.Read ( module Language.TCT.Read.Cell , module Language.TCT.Read.Elem , module Language.TCT.Read.Token , module Language.TCT.Read.Tree , module Language.TCT.Read ) where import Control.Applicative (Applicative(..)) import Control.Arrow (left) import Control.Monad (Monad(..), join, (=<<)) import Data.Bool import Data.Either (Either(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), any) import Data.Function (($), (.)) import Data.Functor ((<$>)) import Data.List.NonEmpty (NonEmpty(..)) import Data.Semigroup (Semigroup(..)) import Data.Traversable (Traversable(..)) import Data.TreeSeq.Strict (Tree(..), Trees) import Data.Void (Void) import System.FilePath (()) import System.IO (FilePath, IO) import Text.Show (Show(..), showString) import qualified Control.Monad.Trans.Reader as R import qualified Data.ByteString.Lazy as BSL import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL import qualified System.Directory as Directory import qualified System.FilePath as FilePath import qualified Text.Megaparsec as P import Language.TCT.Debug import Language.TCT.Tree import Language.TCT.Cell import Language.TCT.Read.Cell import Language.TCT.Read.Elem import Language.TCT.Read.Tree import Language.TCT.Read.Token -- | Parsing is done in two phases: -- -- 1. indentation-sensitive parsing on 'TL.Text' -- 2. Pair-sensitive parsing on some 'NodeText's resulting of 1. readTCT :: FilePath -> IO (Either ErrorRead (Trees (Cell Node))) readTCT inp = do inpFileCanon <- Directory.makeRelativeToCurrentDirectory =<< Directory.canonicalizePath inp goFile (NodeHeader HeaderDash) $ Span inpFileCanon pos1 pos1:|[] where goFile :: Node -> NonEmpty Span -> IO (Either ErrorRead (Trees (Cell Node))) goFile parentNode spans@(Span{span_file=inpFile}:|inpPath) | any (\Span{span_file} -> span_file == inpFile) inpPath = return $ Left $ ErrorReadIncludeLoop spans | otherwise = do inpText <- readFile inpFile case (`R.runReader` inpPath) $ P.runParserT (p_Trees <* P.eof) inpFile inpText of Left err -> return $ Left $ ErrorReadParser err Right trees -> (join <$>) . sequence <$> traverse (goTree parentNode) (debug0 "readTCTWithIncludes" trees) goTree :: Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node))) goTree parNode t@(Tree c@(Cell ss@(Span{span_file}:|_sn) nod) ts) = case nod of NodeLower{} -> return $ Right $ pure t -- NOTE: preserve NodeText "" NodeText n | TL.null n -> return $ Right $ pure t NodeText n -> case parNode of NodeHeader HeaderBar{} -> return $ Right $ pure t NodeHeader HeaderEqual{} -> return $ Right $ pure t NodeHeader HeaderDashDash{} -> return $ Right $ pure t _ -> return $ left ErrorReadParser $ parseTokens <$> parseLexemes (Cell ss n) NodeHeader (HeaderDotSlash incFile) -> do incFileCanon <- Directory.makeRelativeToCurrentDirectory =<< Directory.canonicalizePath (FilePath.takeDirectory span_file incFile) ((pure . Tree c <$>) <$>) $ -- NOTE: preserve HeaderDotSlash to avoid Seq.spanl -- to merge nodes accross files, when writing XML goFile parNode $ Span incFileCanon pos1 pos1 :| toList ss _ -> (pure . Tree c . join <$>) . sequence <$> traverse (goTree nod') ts where -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText' nod' = case nod of NodePara -> parNode _ -> nod readFile :: FilePath -> IO TL.Text readFile fp = TL.decodeUtf8 <$> BSL.readFile fp -- | Useful when producing only an exact source file rendition. readTCTWithoutIncludes :: FilePath -> TL.Text -> Either ErrorRead (Trees (Cell Node)) readTCTWithoutIncludes inp txt = do trs <- left ErrorReadParser $ (`R.runReader` []) $ P.runParserT (p_Trees <* P.eof) inp txt join <$> traverse (go $ NodeHeader HeaderDash) (debug0 "readTCT" trs) where go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node)) go parent t@(Tree c@(Cell ssn nod) ts) = case nod of NodeLower{} -> Right $ pure t -- NOTE: preserve NodeText "" NodeText n | TL.null n -> Right $ pure t NodeText n -> case parent of NodeHeader HeaderBar{} -> Right $ pure t NodeHeader HeaderEqual{} -> Right $ pure t NodeHeader HeaderDashDash{} -> Right $ pure t _ -> left ErrorReadParser $ parseTokens <$> parseLexemes (Cell ssn n) _ -> pure . Tree c . join <$> traverse (go nod') ts where -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText' nod' = case nod of NodePara -> parent _ -> nod -- * Type 'ErrorRead' data ErrorRead = ErrorReadParser (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void)) | ErrorReadIncludeLoop Spans deriving (Eq) instance Show ErrorRead where showsPrec _p = \case ErrorReadParser e -> showString (P.parseErrorPretty e) ErrorReadIncludeLoop (Span{..}:|spans) -> showString "ErrorReadIncludeLoop" . showString "\n " . showString span_file . showString (foldMap (\s -> "\n included by "<>show s) spans)