module Language.TCT.Read
 ( module Language.TCT.Read.Cell
 , module Language.TCT.Read.Elem
 , module Language.TCT.Read.Token
 , module Language.TCT.Read.Tree
 , module Language.TCT.Read
 ) where

import Control.Applicative (Applicative(..))
import Control.Arrow (left)
import Control.Monad (Monad(..), join, (=<<))
import Data.Bool
import Data.Either (Either(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..), any)
import Data.Function (($), (.))
import Data.Functor ((<$>))
import Data.List.NonEmpty (NonEmpty(..))
import Data.Semigroup (Semigroup(..))
import Data.Traversable (Traversable(..))
import Data.TreeSeq.Strict (Tree(..), Trees)
import Data.Void (Void)
import System.FilePath ((</>))
import System.IO (FilePath, IO)
import Text.Show (Show(..), showString)
import qualified Control.Monad.Trans.Reader as R
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import qualified System.Directory as Directory
import qualified System.FilePath as FilePath
import qualified Text.Megaparsec as P

import Language.TCT.Debug
import Language.TCT.Tree
import Language.TCT.Cell
import Language.TCT.Read.Cell
import Language.TCT.Read.Elem
import Language.TCT.Read.Tree
import Language.TCT.Read.Token

-- | Parsing is done in two phases:
--
-- 1. indentation-sensitive parsing on 'TL.Text'
-- 2. Pair-sensitive parsing on some 'NodeText's resulting of 1.
readTCT :: FilePath -> IO (Either ErrorRead (Trees (Cell Node)))
readTCT inp = do
	inpFileCanon <-
		Directory.makeRelativeToCurrentDirectory =<<
		Directory.canonicalizePath inp
	goFile (NodeHeader HeaderDash) $
		Span inpFileCanon pos1 pos1:|[]
	where
	goFile :: Node -> NonEmpty Span -> IO (Either ErrorRead (Trees (Cell Node)))
	goFile parentNode spans@(Span{span_file=inpFile}:|inpPath)
	 | any (\Span{span_file} -> span_file == inpFile) inpPath =
		return $ Left $ ErrorReadIncludeLoop spans
	 | otherwise = do
		inpText <- readFile inpFile
		case (`R.runReader` inpPath) $ P.runParserT (p_Trees <* P.eof) inpFile inpText of
		 Left err -> return $ Left $ ErrorReadParser err
		 Right trees ->
			(join <$>) . sequence <$>
			traverse
			 (goTree parentNode)
			 (debug0 "readTCTWithIncludes" trees)
	goTree :: Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node)))
	goTree parNode t@(Tree c@(Cell ss@(Span{span_file}:|_sn) nod) ts) =
		case nod of
		 NodeLower{} -> return $ Right $ pure t
		 -- NOTE: preserve NodeText ""
		 NodeText n | TL.null n -> return $ Right $ pure t
		 NodeText n ->
			case parNode of
			 NodeHeader HeaderBar{}      -> return $ Right $ pure t
			 NodeHeader HeaderEqual{}    -> return $ Right $ pure t
			 NodeHeader HeaderDashDash{} -> return $ Right $ pure t
			 _ ->
				return $ left ErrorReadParser $
				parseTokens <$> parseLexemes (Cell ss n)
		 NodeHeader (HeaderDotSlash incFile) -> do
			incFileCanon <-
				Directory.makeRelativeToCurrentDirectory =<<
				Directory.canonicalizePath
				 (FilePath.takeDirectory span_file </> incFile)
			((pure . Tree c <$>) <$>) $
			 -- NOTE: preserve HeaderDotSlash to avoid Seq.spanl
			 --       to merge nodes accross files, when writing XML
				goFile parNode $ Span incFileCanon pos1 pos1 :| toList ss
		 _ ->
			(pure . Tree c . join <$>) .
			sequence <$> traverse (goTree nod') ts
			where
			-- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
			nod' = case nod of
			 NodePara -> parNode
			 _ -> nod

readFile :: FilePath -> IO TL.Text
readFile fp = TL.decodeUtf8 <$> BSL.readFile fp

-- | Useful when producing only an exact source file rendition.
readTCTWithoutIncludes ::
 FilePath -> TL.Text ->
 Either ErrorRead (Trees (Cell Node))
readTCTWithoutIncludes inp txt = do
	trs <-
		left ErrorReadParser $
		(`R.runReader` []) $
		P.runParserT (p_Trees <* P.eof) inp txt
	join <$> traverse (go $ NodeHeader HeaderDash)
	 (debug0 "readTCT" trs)
	where
	go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node))
	go parent t@(Tree c@(Cell ssn nod) ts) =
		case nod of
		 NodeLower{} -> Right $ pure t
		 -- NOTE: preserve NodeText ""
		 NodeText n | TL.null n -> Right $ pure t
		 NodeText n ->
			case parent of
			 NodeHeader HeaderBar{}      -> Right $ pure t
			 NodeHeader HeaderEqual{}    -> Right $ pure t
			 NodeHeader HeaderDashDash{} -> Right $ pure t
			 _ -> left ErrorReadParser $ parseTokens <$> parseLexemes (Cell ssn n)
		 _ -> pure . Tree c . join <$> traverse (go nod') ts
			where
			-- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
			nod' = case nod of
			 NodePara -> parent
			 _ -> nod

-- * Type 'ErrorRead'
data ErrorRead
 =   ErrorReadParser (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void))
 |   ErrorReadIncludeLoop Spans
 deriving (Eq)
instance Show ErrorRead where
	showsPrec _p = \case
	 ErrorReadParser e ->
		showString (P.parseErrorPretty e)
	 ErrorReadIncludeLoop (Span{..}:|spans) ->
		showString "ErrorReadIncludeLoop" .
		showString "\n             " . showString span_file .
		showString (foldMap (\s -> "\n included by "<>show s) spans)