module Hdoc.TCT.Read ( module Hdoc.TCT.Read.Cell , module Hdoc.TCT.Read.Elem , module Hdoc.TCT.Read.Token , module Hdoc.TCT.Read.Tree , module Hdoc.TCT.Read ) where import Control.Applicative (Applicative(..)) import Control.Arrow (left) import Control.Monad (Monad(..), join, (=<<)) import Data.Bool import Data.Either (Either(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), any) import Data.Function (($), (.)) import Data.Functor ((<$>)) import Data.Functor.Compose (Compose(..)) import Data.List.NonEmpty (NonEmpty(..)) import Data.Semigroup (Semigroup(..)) import Data.Traversable (Traversable(..)) import Data.TreeSeq.Strict (Tree(..), Trees) import Data.Void (Void) import System.FilePath ((), takeDirectory) import System.IO (FilePath, IO) import Text.Show (Show(..), showString) import qualified Control.Monad.Trans.Reader as R import qualified Data.ByteString.Lazy as BSL import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL import qualified System.Directory as Directory import qualified System.FilePath as FilePath import qualified System.IO.Error as IO import qualified Text.Megaparsec as P import Hdoc.TCT.Debug import Hdoc.TCT.Tree import Hdoc.TCT.Cell import Hdoc.TCT.Read.Cell import Hdoc.TCT.Read.Elem import Hdoc.TCT.Read.Tree import Hdoc.TCT.Read.Token -- | Parsing is done in two phases: -- -- 1. Indentation-sensitive parsing on 'TL.Text'. -- 2. 'Pair'-sensitive parsing on some 'NodeText's resulting of 1. . readTCT :: FilePath -> IO (Either ErrorRead (Trees (Cell Node))) readTCT inp = do inpFileCanon <- Directory.makeRelativeToCurrentDirectory =<< Directory.canonicalizePath inp goFile (NodeHeader HeaderDash) $ Span inpFileCanon pos1 pos1:|[] where goFile :: Node -> NonEmpty Span -> IO (Either ErrorRead (Trees (Cell Node))) goFile parentNode loc@(Span{span_file=inpFile}:|inpPath) | any (\Span{span_file} -> span_file == inpFile) inpPath = return $ Left $ ErrorReadIncludeLoop loc | otherwise = do readFile inpFile >>= \case Left err -> return $ Left $ ErrorReadIO loc err Right inpText -> case (`R.runReader` inpPath) $ P.runParserT (p_Trees <* P.eof) inpFile inpText of Left err -> return $ Left $ ErrorReadParser err Right trees -> (join <$>) . sequence <$> traverse (goTree parentNode) trees -- DEBUG: (debug0 "readTCT" trees) goTree :: Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node))) goTree parNode t@(Tree c@(Cell ss@(Span{span_file}:|_sn) nod) ts) = case nod of NodeLower{} -> return $ Right $ pure t -- NOTE: preserve NodeText "" NodeText n | TL.null n -> return $ Right $ pure t NodeText n -> case parNode of NodeHeader HeaderBar{} -> return $ Right $ pure t NodeHeader HeaderEqual{} -> return $ Right $ pure t NodeHeader HeaderDashDash{} -> return $ Right $ pure t _ -> return $ left ErrorReadParser $ parseTokens <$> parseLexemes (Cell ss n) NodeHeader (HeaderDotSlash incFile) -> do incFileCanon <- Directory.makeRelativeToCurrentDirectory =<< Directory.canonicalizePath (FilePath.takeDirectory span_file incFile) ((pure . Tree c <$>) <$>) $ -- NOTE: preserve HeaderDotSlash to avoid Seq.spanl -- to merge nodes accross files, when writing XML goFile parNode $ Span incFileCanon pos1 pos1 :| toList ss _ -> (pure . Tree c . join <$>) . sequence <$> traverse (goTree nod') ts where -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText' nod' = case nod of NodePara -> parNode _ -> nod readFile :: FilePath -> IO (Either IO.IOError TL.Text) readFile fp = (Right . TL.decodeUtf8 <$> BSL.readFile fp) `IO.catchIOError` \e -> if IO.isAlreadyInUseError e || IO.isDoesNotExistError e || IO.isPermissionError e then return $ Left e else IO.ioError e -- | Useful when producing only an exact source file rendition. readTCTWithoutIncludes :: FilePath -> TL.Text -> Either ErrorRead (Trees (Cell Node)) readTCTWithoutIncludes inp txt = do trees <- left ErrorReadParser $ (`R.runReader` []) $ P.runParserT (p_Trees <* P.eof) inp txt join <$> traverse (go $ NodeHeader HeaderDash) trees -- (debug0 "readTCTWithoutIncludes" trees) where go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node)) go parent t@(Tree c@(Cell ssn nod) ts) = case nod of NodeLower{} -> Right $ pure t -- NOTE: preserve NodeText "" NodeText n | TL.null n -> Right $ pure t NodeText n -> case parent of NodeHeader HeaderBar{} -> Right $ pure t NodeHeader HeaderEqual{} -> Right $ pure t NodeHeader HeaderDashDash{} -> Right $ pure t _ -> left ErrorReadParser $ parseTokens <$> parseLexemes (Cell ssn n) _ -> pure . Tree c . join <$> traverse (go nod') ts where -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText' nod' = case nod of NodePara -> parent _ -> nod dependencies :: Trees (Cell Node) -> [FilePath] dependencies = foldr (go "") [] where go :: FilePath -> Tree (Cell Node) -> [FilePath] -> [FilePath] go dir (Tree (Cell _ss n) ts) acc = case n of NodeHeader (HeaderDotSlash file) -> (dirfile) : foldr (go (dirtakeDirectory file)) acc ts _ -> foldr (go dir) acc ts -- * Type 'ErrorRead' data ErrorRead = ErrorReadParser (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void)) | ErrorReadIncludeLoop Location | ErrorReadIO Location IO.IOError deriving (Eq) instance Show ErrorRead where showsPrec _p = \case ErrorReadParser e -> showString (P.parseErrorPretty e) ErrorReadIncludeLoop (Span{..}:|loc) -> showString "ErrorReadIncludeLoop" . showString "\n " . showString span_file . showString (foldMap (\s -> "\n included by "<>show s) loc) ErrorReadIO (_:|loc) err -> showString "ErrorReadIO" . showString "\n " . showsPrec 10 err . showString (foldMap (\s -> "\n in "<>show s) loc)