{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ViewPatterns #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
-- | Read DTC from TCT.
module Textphile.DTC.Read.TCT where
import Control.Applicative (Applicative(..), optional)
import Control.Monad (Monad(..))
import Data.Bool
import Data.Default.Class (Default(..))
import Data.Either (Either(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..), all)
import Data.Function (($), (.), const, id)
import Data.Functor ((<$>))
import Data.Int (Int)
import Data.List.NonEmpty (NonEmpty(..))
import Data.Maybe (Maybe(..), maybe)
import Data.Monoid (Monoid(..), First(..))
import Data.Ord (Ord(..))
import Data.Ratio ((%))
import Data.Semigroup (Semigroup(..))
import Data.Sequence (ViewL(..), (|>))
import Data.String (String)
import Data.Tuple (fst, snd)
import Prelude (error)
import Text.Blaze.DTC (xmlns_dtc)
import Text.Read (readMaybe, Read(..))
import Text.Show (Show(..))
import qualified Control.Monad.Trans.State as S
import qualified Data.Char as Char
import qualified Data.List as List
import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.Map.Strict as Map
import qualified Data.Ratio as Ratio
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import qualified Data.Text.Lazy as TL
import qualified GHC.Read as Read (expectP)
import qualified Symantic.RNC as RNC
import qualified Symantic.XML as XML
import qualified Text.Megaparsec as P
import qualified Text.Read as Read

import Textphile.TCT hiding (Parser, ErrorRead)
import Textphile.XML (XML, XMLs)
import Textphile.Utils (Nat(..), Nat1(..), succNat1)
import qualified Textphile.DTC.Document as DTC
import qualified Textphile.DTC.Sym as DTC
import qualified Textphile.RNC as RNC
import qualified Textphile.XML as XML
import qualified Textphile.TCT.Cell as TCT

readDTC ::
 DTC.Sym_DTC Parser =>
 XMLs ->
 Either (P.ParseErrorBundle XMLs ErrorRead) DTC.Document
readDTC stateInput = (fst <$>) $ snd $
	P.runParser' (S.runStateT (DTC.document <* P.eof) (def::State)) P.State
	 { P.stateInput
	 , P.stateOffset = 0
	 , P.stateParseErrors = []
	 , P.statePosState =
		error "[BUG] validateXML: getSourcePos is not helpful here, please use annotated source locations"
		-- NOTE: reporting the node number is less helpful
		-- than the source text line and number where the node is;
		-- P.statePosState is only used by P.getSourcePos.
	 }

-- * Type 'State'
data State = State
 { state_posXML :: XML.Pos
 , state_locTCT :: TCT.Location
 } deriving (Eq,Show)
instance Default State where
	def = State
	 { state_posXML = def
	 , state_locTCT = def
	 }

-- * Type 'Parser'
type Parser = S.StateT State (P.Parsec ErrorRead XMLs)

instance RNC.Sym_Rule Parser where
	-- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
	rule _n = id
	arg _n = pure ()
instance RNC.Sym_RNC Parser where
	namespace _p _n = pure ()
	element n p = do
		ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
		p_element n p ts
		where
		expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty
		check (XML.Tree (XML.Sourced src (XML.NodeElem e)) ts)
		 | e == n
		 = Just $ XML.Sourced src $ removePI $ removeXMLNS $ removeSpaces ts
			where
			removePI xs =
				(`Seq.filter` xs) $ \case
					 XML.Tree (XML.unSourced -> XML.NodePI{}) _ts -> False
					 _ -> True
			removeSpaces xs =
				if (`all` xs) $ \case
				 XML.Tree (XML.unSourced -> XML.NodeText (XML.EscapedText et)) _ts ->
					all (\case
					 XML.EscapedPlain t -> TL.all Char.isSpace t
					 _ -> False) et
				 _ -> True
				then (`Seq.filter` xs) $ \case
					 XML.Tree (XML.unSourced -> XML.NodeText{}) _ts -> False
					 _ -> True
				else xs
			removeXMLNS xs =
				let (attrs,rest) = (`Seq.spanl` xs) $ \case
					 XML.Tree (XML.unSourced -> XML.NodeAttr{}) _ts -> True
					 _ -> False in
				let attrs' = (`Seq.filter` attrs) $ \case
					 XML.Tree (XML.unSourced -> XML.NodeAttr a) _ts ->
						case a of
						 XML.QName "" "xmlns" -> False
						 XML.QName ns _l -> ns /= XML.xmlns_xmlns
					 _ -> True in
				attrs' <> rest
		check _t = Nothing
	attribute n p = do
		ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
		p_XMLs p ts
		where
		expected = XML.Tree0 (XML.notSourced $ XML.NodeAttr n)
		check (XML.Tree (XML.unSourced -> XML.NodeAttr k)
		      v@(toList -> [XML.Tree0 (XML.unSourced -> XML.NodeText _v)])) | k == n =
			Just v
		check _t = Nothing
	any = P.label "any" $
		P.token (const $ Just ())  Set.empty
	anyElem ns p = P.label "anyElem" $ do
		(n,ts) <- P.token check $ Set.singleton $ P.Tokens $ pure expected
		p_XMLs (p $ XML.qNameLocal n) ts
		where
		expected = XML.Tree (XML.notSourced $ XML.NodeElem $ XML.QName ns $ XML.NCName "*") mempty
		check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts)
		 | XML.qNameSpace e == ns
		 = Just $ (e,ts)
		check _t = Nothing
	escapedText = do
		P.token check $ Set.singleton $ P.Tokens $ pure expected
		where
		expected = XML.Tree0 (XML.notSourced $ XML.NodeText $ XML.EscapedText mempty)
		check (XML.Tree0 (XML.unSourced -> XML.NodeText t)) = Just t
		check _t = Nothing
	optional = P.optional
	option   = P.option
	choice   = P.choice
	try      = P.try
	fail     = P.label "fail" $ P.failure Nothing mempty
type instance RNC.Permutation Parser = RNC.Perm Parser
instance RNC.Sym_Permutation Parser where
	runPermutation (RNC.Perm value parser) = optional parser >>= f
		where
		-- NOTE: copy Control.Applicative.Permutations.runPermutation
		-- to replace the commented empty below so that P.TrivialError
		-- has the unexpected token.
		f  Nothing = maybe {-empty-}(P.token (const Nothing) Set.empty) pure value
		f (Just p) = RNC.runPermutation p
	toPermutation p = RNC.Perm Nothing $ pure <$> p
	toPermutationWithDefault v p = RNC.Perm (Just v) $ pure <$> p

instance P.Stream XMLs where
	type Token  XMLs = XML
	type Tokens XMLs = XMLs
	take1_ s =
		case Seq.viewl s of
		 EmptyL -> Nothing
		 t@(XML.Tree XML.Sourced{XML.unSourced=n} _) :< ts
		  | RNC.isIgnoredNode n -> P.take1_ ts
		  | otherwise -> Just (t, ts)
	takeN_ n s | n <= 0    = Just (mempty, s)
	           | null s    = Nothing
	           | otherwise =
		let (ns,rs) = Seq.splitAt n s in
		let (ko,ok) = Seq.partition (RNC.isIgnoredNode . XML.unSourced . XML.unTree) ns in
		case P.takeN_ (Seq.length ko) rs of
		 Nothing -> Just (ok, rs)
		 Just (ns',rs') -> Just (ok<>ns', rs')
	tokensToChunk _s  = Seq.fromList
	chunkToTokens _s  = toList
	chunkLength _s    = Seq.length
	takeWhile_        = Seq.spanl
	-- | NOTE: unimplemented: useless since each 'XML.Node' is annotated with its 'FileSource'.
	reachOffset       = error "[BUG] P.Stream XMLs: reachOffset is not helpful, please use annotated source locations"
	-- | NOTE: useless since each 'XML.Node' is annotated with its 'FileSource'.
	reachOffsetNoLine = error "[BUG] P.Stream XMLs: reachOffsetNoLine is not helpful, please use annotated source locations"
	showTokens _s toks = List.intercalate ", " $ toList $ showTree <$> toks
		where
		showTree :: XML -> String
		showTree (Tree a _ts) =
			showSourced a $ \case
			 XML.NodeAttr n     -> show (remove_XMLNS_DTC n)<>"="
			 XML.NodeCDATA _t   -> "cdata"
			 XML.NodeComment _c -> "comment"
			 XML.NodeElem n     -> "<"<>show (remove_XMLNS_DTC n)<>">"
			 XML.NodePI n _t    -> "processing-instruction"<>show n
			 XML.NodeText _t    -> "text"
		remove_XMLNS_DTC n
		 | XML.qNameSpace n == xmlns_dtc = n{XML.qNameSpace=""}
		 | otherwise = n
		
		showSourced (Sourced path@(FileRange{fileRange_file} :| _) a) f =
			if null fileRange_file
			then f a
			else f a <> foldMap (\p -> "\n in "<>show p) path

-- | @p_XMLs p xs@ returns a parser parsing @xs@ entirely with @p@,
-- updating 'P.stateOffset' and re-raising any exception.
p_XMLs :: Parser a -> XMLs -> Parser a
p_XMLs p stateInput = do
	s <- S.get
	st <- P.getParserState
	let (st', res) = P.runParser' (S.runStateT (p <* P.eof) s) P.State
		 { P.stateInput  = stateInput
		 , P.stateOffset = P.stateOffset st
		 , P.statePosState = P.PosState
			 { P.pstateInput      = stateInput
			 , P.pstateOffset     = P.stateOffset st
			 , P.pstateSourcePos  = P.pstateSourcePos $ P.statePosState st
			 , P.pstateTabWidth   = P.pos1
			 , P.pstateLinePrefix = ""
			 }
		 }
	P.updateParserState (\ps -> ps{P.stateOffset = P.stateOffset st'})
	case res of
	 Right (a, s') -> do
		S.put s'
		return a
	 Left (P.ParseErrorBundle errs _) ->
		case NonEmpty.head errs of
		 P.TrivialError _o us es -> P.failure us es
			{-
			lift $ P.ParsecT $ \ps _cok cerr _eok _eerr ->
				cerr (P.TrivialError o us es) ps
			-}
		 P.FancyError _o es -> P.fancyFailure es

p_element :: XML.QName -> Parser a -> Cell XMLs -> Parser a
p_element n p (Sourced state_locTCT ts) = do
	let mayNameOrFigureName
		| n == "aside" = Nothing
		-- NOTE: skip aside.
		| n == "figure"
		-- NOTE: special case renaming the current XML.Pos
		-- using the @type attribute to have positions like this:
		--   section1.Quote1
		--   section1.Example1
		--   section1.Quote2
		-- instead of:
		--   section1.figure1
		--   section1.figure2
		--   section1.figure3
		, Just ty <- getFirst $ (`foldMap` ts) $ \case
		 Tree (unSourced -> XML.NodeAttr "type") xs
		  | [Tree (Sourced _ (XML.NodeText t)) _] <- toList xs
		  , Just ty <- XML.ncName $ XML.unescapeText t
		  -> First $ Just ty
		 _ -> First Nothing
		= Just $ XML.QName xmlns_dtc ty
		| otherwise = Just n
	case mayNameOrFigureName of
	 Nothing -> do
		st <- S.get
		S.put st{state_locTCT}
		res <- p_XMLs p ts
		S.put st
		return res
	 Just nameOrFigureName -> do
		st@State{state_posXML} <- S.get
		let incrPrecedingSibling name =
			maybe (Nat1 1) succNat1 $
			Map.lookup name $
			XML.pos_precedingSiblings state_posXML
		S.put State
		 { state_posXML = state_posXML
			 -- NOTE: in children, push current name incremented on ancestors
			 -- and reset preceding siblings.
			 { XML.pos_precedingSiblings = mempty
			 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
			 , XML.pos_ancestorsWithFigureNames =
				XML.pos_ancestorsWithFigureNames state_posXML |>
				( nameOrFigureName
				, incrPrecedingSibling nameOrFigureName )
			 }
		 , state_locTCT
		 }
		res <- p_XMLs p ts
		S.put st
		 { state_posXML = state_posXML
			 -- NOTE: after current, increment current name
			 -- and reset ancestors.
			 { XML.pos_precedingSiblings =
				(if n == nameOrFigureName then id
				else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
				Map.insertWith (const succNat1) n (Nat1 1) $
				XML.pos_precedingSiblings state_posXML
			 }
		 }
		return res

instance RNC.Sym_RNC_Extra Parser where
	none = RNC.rule "none" $ P.eof
	comment = do
		s <- P.getInput
		case Seq.viewl s of
		 Tree0 (unSourced -> XML.NodeComment c) :< ts -> do
			P.setInput ts
			return c
		 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
		 EmptyL -> P.failure Nothing ex
		where
		ex = Set.singleton $ P.Tokens $ pure expected
		expected = Tree0 (cell0 $ XML.NodeComment "")
	bool = RNC.rule "bool" $ RNC.text >>= \t ->
		case t of
		 "true"  -> return True
		 "false" -> return False
		 _ -> P.fancyFailure $
			Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t
	int = RNC.rule "int" $ RNC.text >>= \t ->
		case readMaybe (TL.unpack t) of
		 Just i -> return i
		 Nothing -> P.fancyFailure $
			Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
	rational = RNC.rule "rational" $ RNC.text >>= \t ->
		case readMaybe (TL.unpack t) of
		 Just (Rational i) | 0 <= i -> return i
		                   | otherwise -> P.fancyFailure $
			Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
		 Nothing -> P.fancyFailure $
			Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
	rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
		case readMaybe (TL.unpack t) of
		 Just (Rational i) | 0 <= i -> return i
		                   | otherwise -> P.fancyFailure $
			Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
		 Nothing -> P.fancyFailure $
			Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
	nat = RNC.rule "nat" $ RNC.int >>= \i ->
		if i >= 0
		then return $ Nat i
		else P.fancyFailure $ Set.singleton $
			P.ErrorCustom $ ErrorRead_Not_Nat i
	nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
		if i > 0
		then return $ Nat1 i
		else P.fancyFailure $ Set.singleton $
			P.ErrorCustom $ ErrorRead_Not_Nat1 i
instance DTC.Sym_DTC Parser where
	positionXML = S.gets state_posXML
	locationTCT = S.gets state_locTCT


-- ** Type 'ErrorRead'
data ErrorRead
 =   ErrorRead_EndOfInput
 |   ErrorRead_Not_Bool TL.Text
 |   ErrorRead_Not_Int TL.Text
 |   ErrorRead_Not_Nat Int
 |   ErrorRead_Not_Nat1 Int
 |   ErrorRead_Not_Rational TL.Text
 |   ErrorRead_Not_Positive TL.Text
 deriving (Eq,Ord,Show)
instance P.ShowErrorComponent ErrorRead where
	showErrorComponent = show

-- ** Type 'Rational'
-- | Wrapper to change the 'Read' instance.
newtype Rational = Rational Ratio.Rational
instance Read Rational where
	readPrec = do
		x <- Read.step readPrec
		Read.expectP (Read.Symbol "/")
		y <- Read.step readPrec
		return $ Rational (x % y)