module Language.TCT.Read
 ( module Language.TCT.Read.Cell
 , module Language.TCT.Read.Elem
 , module Language.TCT.Read.Token
 , module Language.TCT.Read.Tree
 , module Language.TCT.Read
 ) where

import Control.Applicative (Applicative(..))
import Control.Arrow (left)
import Control.Monad (Monad(..), join, (=<<))
import Data.Bool
import Data.Either (Either(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..), any)
import Data.Function (($), (.))
import Data.Functor ((<$>))
import Data.Functor.Compose (Compose(..))
import Data.List.NonEmpty (NonEmpty(..))
import Data.Semigroup (Semigroup(..))
import Data.Traversable (Traversable(..))
import Data.TreeSeq.Strict (Tree(..), Trees)
import Data.Void (Void)
import System.FilePath ((</>))
import System.IO (FilePath, IO)
import Text.Show (Show(..), showString)
import qualified Control.Monad.Trans.Reader as R
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import qualified System.Directory as Directory
import qualified System.FilePath as FilePath
import qualified System.IO.Error as IO
import qualified Text.Megaparsec as P

import Language.TCT.Debug
import Language.TCT.Tree
import Language.TCT.Cell
import Language.TCT.Read.Cell
import Language.TCT.Read.Elem
import Language.TCT.Read.Tree
import Language.TCT.Read.Token

-- | Parsing is done in two phases:
--
-- 1. indentation-sensitive parsing on 'TL.Text'
-- 2. Pair-sensitive parsing on some 'NodeText's resulting of 1.
readTCT :: FilePath -> IO (Either ErrorRead (Trees (Cell Node)))
readTCT inp = do
	inpFileCanon <-
		Directory.makeRelativeToCurrentDirectory =<<
		Directory.canonicalizePath inp
	goFile (NodeHeader HeaderDash) $
		Span inpFileCanon pos1 pos1:|[]
	where
	goFile :: Node -> NonEmpty Span -> IO (Either ErrorRead (Trees (Cell Node)))
	goFile parentNode spans@(Span{span_file=inpFile}:|inpPath)
	 | any (\Span{span_file} -> span_file == inpFile) inpPath =
		return $ Left $ ErrorReadIncludeLoop spans
	 | otherwise = do
		readFile inpFile >>= \case
		 Left err -> return $ Left $ ErrorReadIO spans err
		 Right inpText ->
			case (`R.runReader` inpPath) $ P.runParserT (p_Trees <* P.eof) inpFile inpText of
			 Left err -> return $ Left $ ErrorReadParser err
			 Right trees ->
				(join <$>) . sequence <$>
				traverse
				 (goTree parentNode)
				 (debug0 "readTCTWithIncludes" trees)
	goTree :: Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node)))
	goTree parNode t@(Tree c@(Cell ss@(Span{span_file}:|_sn) nod) ts) =
		case nod of
		 NodeLower{} -> return $ Right $ pure t
		 -- NOTE: preserve NodeText ""
		 NodeText n | TL.null n -> return $ Right $ pure t
		 NodeText n ->
			case parNode of
			 NodeHeader HeaderBar{}      -> return $ Right $ pure t
			 NodeHeader HeaderEqual{}    -> return $ Right $ pure t
			 NodeHeader HeaderDashDash{} -> return $ Right $ pure t
			 _ ->
				return $ left ErrorReadParser $
				parseTokens <$> parseLexemes (Cell ss n)
		 NodeHeader (HeaderDotSlash incFile) -> do
			incFileCanon <-
				Directory.makeRelativeToCurrentDirectory =<<
				Directory.canonicalizePath
				 (FilePath.takeDirectory span_file </> incFile)
			((pure . Tree c <$>) <$>) $
			 -- NOTE: preserve HeaderDotSlash to avoid Seq.spanl
			 --       to merge nodes accross files, when writing XML
				goFile parNode $ Span incFileCanon pos1 pos1 :| toList ss
		 _ ->
			(pure . Tree c . join <$>) .
			sequence <$> traverse (goTree nod') ts
			where
			-- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
			nod' = case nod of
			 NodePara -> parNode
			 _ -> nod

readFile :: FilePath -> IO (Either IO.IOError TL.Text)
readFile fp =
	(Right . TL.decodeUtf8 <$> BSL.readFile fp)
	`IO.catchIOError` \e ->
		if IO.isAlreadyInUseError e
		|| IO.isDoesNotExistError e
		|| IO.isPermissionError   e
		then return $ Left e
		else IO.ioError e

-- | Useful when producing only an exact source file rendition.
readTCTWithoutIncludes ::
 FilePath -> TL.Text ->
 Either ErrorRead (Trees (Cell Node))
readTCTWithoutIncludes inp txt = do
	trs <-
		left ErrorReadParser $
		(`R.runReader` []) $
		P.runParserT (p_Trees <* P.eof) inp txt
	join <$> traverse (go $ NodeHeader HeaderDash)
	 (debug0 "readTCT" trs)
	where
	go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node))
	go parent t@(Tree c@(Cell ssn nod) ts) =
		case nod of
		 NodeLower{} -> Right $ pure t
		 -- NOTE: preserve NodeText ""
		 NodeText n | TL.null n -> Right $ pure t
		 NodeText n ->
			case parent of
			 NodeHeader HeaderBar{}      -> Right $ pure t
			 NodeHeader HeaderEqual{}    -> Right $ pure t
			 NodeHeader HeaderDashDash{} -> Right $ pure t
			 _ -> left ErrorReadParser $ parseTokens <$> parseLexemes (Cell ssn n)
		 _ -> pure . Tree c . join <$> traverse (go nod') ts
			where
			-- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
			nod' = case nod of
			 NodePara -> parent
			 _ -> nod

dependencies :: Trees (Cell Node) -> [FilePath]
dependencies = foldr f [] . Compose
	where
	f (Cell _ss n) acc =
		case n of
		 NodeHeader (HeaderDotSlash file) -> file:acc
		 _ -> acc

-- * Type 'ErrorRead'
data ErrorRead
 =   ErrorReadParser (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void))
 |   ErrorReadIncludeLoop Spans
 |   ErrorReadIO Spans IO.IOError
 deriving (Eq)
instance Show ErrorRead where
	showsPrec _p = \case
	 ErrorReadParser e ->
		showString (P.parseErrorPretty e)
	 ErrorReadIncludeLoop (Span{..}:|spans) ->
		showString "ErrorReadIncludeLoop" .
		showString "\n             " . showString span_file .
		showString (foldMap (\s -> "\n included by "<>show s) spans)
	 ErrorReadIO (_:|spans) err ->
		showString "ErrorReadIO" .
		showString "\n " . showsPrec 10 err .
		showString (foldMap (\s -> "\n in "<>show s) spans)