module Hdoc.TCT.Read ( module Hdoc.TCT.Read.Cell , module Hdoc.TCT.Read.Elem , module Hdoc.TCT.Read.Token , module Hdoc.TCT.Read.Tree , module Hdoc.TCT.Read ) where import Control.Applicative (Applicative(..)) import Control.Arrow (left) import Control.Monad (Monad(..), join, (=<<)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Either (Either(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), any) import Data.Function (($), (.)) import Data.Functor ((<$>)) import Data.List.NonEmpty (NonEmpty(..)) import Data.Semigroup (Semigroup(..)) import Data.Traversable (Traversable(..)) import Data.TreeSeq.Strict (Tree(..), Trees) import Data.Void (Void) import System.FilePath ((), takeDirectory) import System.IO (FilePath, IO) import Text.Show (Show(..), showString) import qualified Control.Monad.Trans.Reader as R import qualified Data.ByteString.Lazy as BSL import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL import qualified System.Directory as Directory import qualified System.FilePath as FilePath import qualified System.IO.Error as IO import qualified Text.Megaparsec as P import Hdoc.TCT.Debug import Hdoc.TCT.Tree import Hdoc.TCT.Cell import Hdoc.TCT.Read.Cell import Hdoc.TCT.Read.Elem import Hdoc.TCT.Read.Tree import Hdoc.TCT.Read.Token -- | Parsing is done in two phases: -- -- 1. Indentation-sensitive parsing on 'TL.Text'. -- 2. 'Pair'-sensitive parsing on some 'NodeText's resulting of 1. . readTCT :: FilePath -> IO (Either ErrorRead (Trees (Cell Node))) readTCT inp = do inpFileCanon <- Directory.makeRelativeToCurrentDirectory =<< Directory.canonicalizePath inp goFile (NodeHeader HeaderDash) $ FileRange inpFileCanon def def:|[] where goFile :: Node -> NonEmpty (FileRange LineColumn) -> IO (Either ErrorRead (Trees (Cell Node))) goFile parentNode loc@(FileRange{fileRange_file=inpFile}:|inpPath) | any (\FileRange{fileRange_file} -> fileRange_file == inpFile) inpPath = return $ Left $ ErrorReadIncludeLoop loc | otherwise = do readFile inpFile >>= \case Left err -> return $ Left $ ErrorReadIO loc err Right inpText -> case (`R.runReader` inpPath) $ P.runParserT (p_Trees <* P.eof) inpFile inpText of Left err -> return $ Left $ ErrorReadParser err Right trees -> (join <$>) . sequence <$> traverse (goTree parentNode) trees -- DEBUG: (debug0 "readTCT" trees) goTree :: Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node))) goTree parNode t@(Tree c@(Sourced ss@(FileRange{fileRange_file}:|_sn) nod) ts) = case nod of NodeLower{} -> return $ Right $ pure t -- NOTE: preserve NodeText "" NodeText n | TL.null n -> return $ Right $ pure t NodeText n -> case parNode of NodeHeader HeaderBar{} -> return $ Right $ pure t NodeHeader HeaderEqual{} -> return $ Right $ pure t NodeHeader HeaderDashDash{} -> return $ Right $ pure t _ -> return $ left ErrorReadParserCell $ parseTokens <$> parseLexemes (Sourced ss n) NodeHeader (HeaderDotSlash incFile) -> do incFileCanon <- Directory.makeRelativeToCurrentDirectory =<< Directory.canonicalizePath (FilePath.takeDirectory fileRange_file incFile) ((pure . Tree c <$>) <$>) $ -- NOTE: preserve HeaderDotSlash to avoid Seq.spanl -- to merge nodes accross files, when writing XML goFile parNode $ FileRange incFileCanon def def :| toList ss _ -> (pure . Tree c . join <$>) . sequence <$> traverse (goTree nod') ts where -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText' nod' = case nod of NodePara -> parNode _ -> nod readFile :: FilePath -> IO (Either IO.IOError TL.Text) readFile fp = (Right . TL.decodeUtf8 <$> BSL.readFile fp) `IO.catchIOError` \e -> if IO.isAlreadyInUseError e || IO.isDoesNotExistError e || IO.isPermissionError e then return $ Left e else IO.ioError e -- | Useful when producing only an exact source file rendition. readTCTWithoutIncludes :: FilePath -> TL.Text -> Either ErrorRead (Trees (Cell Node)) readTCTWithoutIncludes inp txt = do trees <- left ErrorReadParser $ (`R.runReader` []) $ P.runParserT (p_Trees <* P.eof) inp txt join <$> traverse (go $ NodeHeader HeaderDash) trees -- (debug0 "readTCTWithoutIncludes" trees) where go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node)) go parent t@(Tree c@(Sourced ssn nod) ts) = case nod of NodeLower{} -> Right $ pure t -- NOTE: preserve NodeText "" NodeText n | TL.null n -> Right $ pure t NodeText n -> case parent of NodeHeader HeaderBar{} -> Right $ pure t NodeHeader HeaderEqual{} -> Right $ pure t NodeHeader HeaderDashDash{} -> Right $ pure t _ -> left ErrorReadParserCell $ parseTokens <$> parseLexemes (Sourced ssn n) _ -> pure . Tree c . join <$> traverse (go nod') ts where -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText' nod' = case nod of NodePara -> parent _ -> nod dependencies :: Trees (Cell Node) -> [FilePath] dependencies = foldr (go "") [] where go :: FilePath -> Tree (Cell Node) -> [FilePath] -> [FilePath] go dir (Tree (Sourced _ss n) ts) acc = case n of NodeHeader (HeaderDotSlash file) -> (dirfile) : foldr (go (dirtakeDirectory file)) acc ts _ -> foldr (go dir) acc ts -- * Type 'ErrorRead' data ErrorRead = ErrorReadParser (P.ParseErrorBundle TL.Text Void) | ErrorReadParserCell (P.ParseErrorBundle StreamCell Void) | ErrorReadIncludeLoop Location | ErrorReadIO Location IO.IOError deriving (Eq) instance Show ErrorRead where showsPrec _p = \case ErrorReadParser e -> showString (P.errorBundlePretty e) ErrorReadParserCell e -> showString (P.errorBundlePretty e) ErrorReadIncludeLoop (FileRange{..}:|loc) -> showString "ErrorReadIncludeLoop" . showString "\n " . showString fileRange_file . showString (foldMap (\s -> "\n included by "<>show s) loc) ErrorReadIO (_:|loc) err -> showString "ErrorReadIO" . showString "\n " . showsPrec 10 err . showString (foldMap (\s -> "\n in "<>show s) loc)