module Hdoc.TCT.Read
 ( module Hdoc.TCT.Read.Cell
 , module Hdoc.TCT.Read.Elem
 , module Hdoc.TCT.Read.Token
 , module Hdoc.TCT.Read.Tree
 , module Hdoc.TCT.Read
 ) where

import Control.Applicative (Applicative(..))
import Control.Arrow (left)
import Control.Monad (Monad(..), join, (=<<))
import Data.Bool
import Data.Either (Either(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..), any)
import Data.Function (($), (.))
import Data.Functor ((<$>))
import Data.List.NonEmpty (NonEmpty(..))
import Data.Semigroup (Semigroup(..))
import Data.Traversable (Traversable(..))
import Data.TreeSeq.Strict (Tree(..), Trees)
import Data.Void (Void)
import System.FilePath ((</>), takeDirectory)
import System.IO (FilePath, IO)
import Text.Show (Show(..), showString)
import qualified Control.Monad.Trans.Reader as R
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import qualified System.Directory as Directory
import qualified System.FilePath as FilePath
import qualified System.IO.Error as IO
import qualified Text.Megaparsec as P

import Hdoc.TCT.Debug
import Hdoc.TCT.Tree
import Hdoc.TCT.Cell
import Hdoc.TCT.Read.Cell
import Hdoc.TCT.Read.Elem
import Hdoc.TCT.Read.Tree
import Hdoc.TCT.Read.Token

-- | Parsing is done in two phases:
--
-- 1. Indentation-sensitive parsing on 'TL.Text'.
-- 2. 'Pair'-sensitive parsing on some 'NodeText's resulting of 1. .
readTCT :: FilePath -> IO (Either ErrorRead (Trees (Cell Node)))
readTCT inp = do
	inpFileCanon <-
		Directory.makeRelativeToCurrentDirectory =<<
		Directory.canonicalizePath inp
	goFile (NodeHeader HeaderDash) $
		FileRange inpFileCanon pos1 pos1:|[]
	where
	goFile :: Node -> NonEmpty FileRange -> IO (Either ErrorRead (Trees (Cell Node)))
	goFile parentNode loc@(FileRange{fileRange_file=inpFile}:|inpPath)
	 | any (\FileRange{fileRange_file} -> fileRange_file == inpFile) inpPath =
		return $ Left $ ErrorReadIncludeLoop loc
	 | otherwise = do
		readFile inpFile >>= \case
		 Left err -> return $ Left $ ErrorReadIO loc err
		 Right inpText ->
			case (`R.runReader` inpPath) $ P.runParserT (p_Trees <* P.eof) inpFile inpText of
			 Left err -> return $ Left $ ErrorReadParser err
			 Right trees ->
				(join <$>) . sequence <$>
				traverse
				 (goTree parentNode)
				 trees
				 -- DEBUG: (debug0 "readTCT" trees)
	goTree :: Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node)))
	goTree parNode t@(Tree c@(Sourced ss@(FileRange{fileRange_file}:|_sn) nod) ts) =
		case nod of
		 NodeLower{} -> return $ Right $ pure t
		 -- NOTE: preserve NodeText ""
		 NodeText n | TL.null n -> return $ Right $ pure t
		 NodeText n ->
			case parNode of
			 NodeHeader HeaderBar{}      -> return $ Right $ pure t
			 NodeHeader HeaderEqual{}    -> return $ Right $ pure t
			 NodeHeader HeaderDashDash{} -> return $ Right $ pure t
			 _ ->
				return $ left ErrorReadParser $
				parseTokens <$> parseLexemes (Sourced ss n)
		 NodeHeader (HeaderDotSlash incFile) -> do
			incFileCanon <-
				Directory.makeRelativeToCurrentDirectory =<<
				Directory.canonicalizePath
				 (FilePath.takeDirectory fileRange_file </> incFile)
			((pure . Tree c <$>) <$>) $
			 -- NOTE: preserve HeaderDotSlash to avoid Seq.spanl
			 --       to merge nodes accross files, when writing XML
				goFile parNode $ FileRange incFileCanon pos1 pos1 :| toList ss
		 _ ->
			(pure . Tree c . join <$>) .
			sequence <$> traverse (goTree nod') ts
			where
			-- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
			nod' = case nod of
			 NodePara -> parNode
			 _ -> nod

readFile :: FilePath -> IO (Either IO.IOError TL.Text)
readFile fp =
	(Right . TL.decodeUtf8 <$> BSL.readFile fp)
	`IO.catchIOError` \e ->
		if IO.isAlreadyInUseError e
		|| IO.isDoesNotExistError e
		|| IO.isPermissionError   e
		then return $ Left e
		else IO.ioError e

-- | Useful when producing only an exact source file rendition.
readTCTWithoutIncludes ::
 FilePath -> TL.Text ->
 Either ErrorRead (Trees (Cell Node))
readTCTWithoutIncludes inp txt = do
	trees <-
		left ErrorReadParser $
		(`R.runReader` []) $
		P.runParserT (p_Trees <* P.eof) inp txt
	join <$> traverse (go $ NodeHeader HeaderDash) trees
	 -- (debug0 "readTCTWithoutIncludes" trees)
	where
	go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node))
	go parent t@(Tree c@(Sourced ssn nod) ts) =
		case nod of
		 NodeLower{} -> Right $ pure t
		 -- NOTE: preserve NodeText ""
		 NodeText n | TL.null n -> Right $ pure t
		 NodeText n ->
			case parent of
			 NodeHeader HeaderBar{}      -> Right $ pure t
			 NodeHeader HeaderEqual{}    -> Right $ pure t
			 NodeHeader HeaderDashDash{} -> Right $ pure t
			 _ -> left ErrorReadParser $ parseTokens <$> parseLexemes (Sourced ssn n)
		 _ -> pure . Tree c . join <$> traverse (go nod') ts
			where
			-- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
			nod' = case nod of
			 NodePara -> parent
			 _ -> nod

dependencies :: Trees (Cell Node) -> [FilePath]
dependencies = foldr (go "") []
	where
	go :: FilePath -> Tree (Cell Node) -> [FilePath] -> [FilePath]
	go dir (Tree (Sourced _ss n) ts) acc =
		case n of
		 NodeHeader (HeaderDotSlash file) ->
			(dir</>file) :
			foldr (go (dir</>takeDirectory file)) acc ts
		 _ ->
			foldr (go dir) acc ts

-- * Type 'ErrorRead'
data ErrorRead
 =   ErrorReadParser (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void))
 |   ErrorReadIncludeLoop Location
 |   ErrorReadIO Location IO.IOError
 deriving (Eq)
instance Show ErrorRead where
	showsPrec _p = \case
	 ErrorReadParser e ->
		showString (P.parseErrorPretty e)
	 ErrorReadIncludeLoop (FileRange{..}:|loc) ->
		showString "ErrorReadIncludeLoop" .
		showString "\n             " . showString fileRange_file .
		showString (foldMap (\s -> "\n included by "<>show s) loc)
	 ErrorReadIO (_:|loc) err ->
		showString "ErrorReadIO" .
		showString "\n " . showsPrec 10 err .
		showString (foldMap (\s -> "\n in "<>show s) loc)