{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Language.TCT.Read.Token where

import Control.Applicative (Applicative(..), Alternative(..))
import Control.Monad (Monad(..))
import Data.Bool
import Data.Char (Char)
import Data.Either (Either(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..))
import Data.Function (($), (.))
import Data.Functor ((<$>), ($>))
import Data.List.NonEmpty (NonEmpty(..))
import Data.Maybe (Maybe(..))
import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.Sequence (ViewL(..), ViewR(..), (<|))
import Data.TreeSeq.Strict (Tree(..), Trees)
import Data.Tuple (fst,snd)
import Data.Void (Void)
import Prelude (Num(..))
import Text.Show (Show(..))
import qualified Data.Char as Char
import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.Sequence as Seq
import qualified Data.Text as Text
import qualified Data.Text.Lazy as TL
import qualified Text.Megaparsec as P
import qualified Text.Megaparsec.Char as P

import Language.TCT.Debug
import Language.TCT.Cell
import Language.TCT.Elem
import Language.TCT.Tree
import Language.TCT.Read.Elem
import Language.TCT.Read.Cell

-- * Type 'Pairs'
-- | Right-only Dyck language,
-- to keep track of opened 'Pair's.
type Pairs = (Tokens,[Opening])
type Tokens = Trees (Cell Node)

-- ** Type 'Opening'
-- | An opened 'Pair' and its content so far.
type Opening = (Cell Pair,Tokens)

appendPairsToken :: Pairs -> Tree (Cell Node) -> Pairs
appendPairsToken ps t = appendPairsTokens ps (pure t)

appendPairsText :: Pairs -> Cell TL.Text -> Pairs
appendPairsText ps (Cell sp t) =
	appendPairsToken ps $
		Tree0 $ Cell sp $
			NodeToken $ TokenText t

appendPairsTokens :: Pairs -> Tokens -> Pairs
appendPairsTokens (ts,[])         toks = (ts`unionTokens`toks,[])
appendPairsTokens (ts,(p0,t0):ps) toks = (ts,(p0,t0`unionTokens`toks):ps)

-- | Unify two 'Tokens', merging border 'TokenText's if any.
unionTokens :: Tokens -> Tokens -> Tokens
unionTokens x y =
	case (Seq.viewr x, Seq.viewl y) of
	 (xs :> x0, y0 :< ys) ->
		case (x0,y0) of
		 (  Tree (Cell (Span  fx  bx _ex:| sx) (NodeToken (TokenText tx))) tsx
		  , Tree (Cell (Span _fy _by  ey:|_sy) (NodeToken (TokenText ty))) tsy ) ->
			xs `unionTokens`
			pure (Tree (Cell (Span fx bx ey:|sx) $ NodeToken $ TokenText $ tx <> ty) (tsx<>tsy)) `unionTokens`
			ys
		 _ -> x <> y
	 (EmptyR, _) -> y
	 (_, EmptyL) -> x

unionsTokens :: Foldable f => f Tokens -> Tokens
unionsTokens = foldl' unionTokens mempty

openPair :: Pairs -> Cell Pair -> Pairs
openPair (t,ps) p = (t,(p,mempty):ps)

-- | Close a 'Pair' when there is a matching 'LexemePairClose'.
closePair :: Pairs -> Cell Pair -> Pairs
closePair ps@(_,[]) (Cell ssp p) = -- debug0 "closePair" $
	appendPairsText ps $ Cell ssp $
		snd $ pairBordersDouble p
closePair (t,(cx@(Cell (Span  fx  bx _ex:| sx) px),t1):ts)
              cy@(Cell (Span _fy _by  ey:|_sy) py) = -- debug0 "closePair" $
	case (px,py) of
	 (PairElem nx ax, PairElem ny ay) | nx == ny ->
		appendPairsToken (t,ts) $
		Tree (Cell (Span fx bx ey:|sx) $ NodePair $ PairElem nx as) t1
		where as | null ay   = ax
		         | otherwise = ax<>ay
	 _ | px == py ->
		appendPairsToken (t,ts) $
		Tree (Cell (Span fx bx ey:|sx) $ NodePair px) t1
	 _ ->
		(`closePair` cy) $
		appendPairsTokens
		 (t,ts)
		 (closeImpaired mempty (cx,t1))

-- | Close a 'Pair' when there is no matching 'LexemePairClose'.
closeImpaired :: Tokens -> (Cell Pair, Tokens) -> Tokens
closeImpaired acc (Cell ssp@(s0:|sp) pair, toks) = -- debug0 "closeImpaired" $
	case pair of
	 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
	 PairHash | Just (Cell (Span{span_end}:|_st) tag, rest) <- tagFrom body ->
		Tree0 (Cell (s0{span_end}:|sp) $ NodeToken $ TokenTag tag) <| rest
		-- NOTE: use bp (not bt) to include the '#'
	 _ -> pure open `unionTokens` body
	where
	body = toks `unionTokens` acc
	open = Tree0 $ Cell ssp $ NodeToken $ TokenText $ fst $ pairBordersDouble pair

-- | Close remaining 'Pair's at end of parsing.
closePairs :: Pairs -> Tokens
closePairs (t0,ps) = -- debug0 "closePairs" $
	t0 `unionTokens` foldl' closeImpaired mempty ps

appendLexeme :: Lexeme -> Pairs -> Pairs
appendLexeme lex acc = -- debug2 "appendLexeme" "lex" "acc" $ \lex acc ->
	case lex of
	 LexemePairOpen ps -> foldl' open acc ps
		where
		-- NOTE: insert an empty node to encode <elem></elem>, not <elem/>
		open a p@(Cell (Span{span_end, ..}:|sp) PairElem{}) = openPair a p `appendPairsText` Cell (Span{span_begin=span_end, ..}:|sp) ""
		open a p = openPair a p
	 LexemePairClose ps -> foldl' closePair acc ps
	 LexemePairAny   ps -> foldl' openPair  acc ps
	 {-
	 LexemePairAny ps ->
		appendPairsText acc $ sconcat $
			((fst . pairBordersSingle) <$>) <$> ps
	 -}
	 LexemePairBoth ps -> appendPairsTokens acc $ Seq.fromList $ toList $ Tree0 . (NodePair <$>) <$> ps
	 LexemeEscape   c  -> appendPairsToken  acc $ Tree0 $ NodeToken . TokenEscape <$> c
	 LexemeLink     t  -> appendPairsToken  acc $ Tree0 $ NodeToken . TokenLink <$> t
	 {-LexemeWhite (unCell -> "") -> acc-}
	 -- LexemeWhite (unCell -> Text.all (==' ') -> True) -> acc
	 LexemeWhite    t  -> appendPairsText acc t
	 LexemeAlphaNum t  -> appendPairsText acc t
	 LexemeOther    t  -> appendPairsText acc t
	 LexemeTree     t  -> appendPairsToken acc t
	 LexemeEnd -> acc

appendLexemes :: Pairs -> [Lexeme] -> Pairs
appendLexemes = foldr appendLexeme

-- * Type 'Lexeme'
-- | 'Lexeme's cut the input in the longest chunks of common semantic,
-- this enables 'orientLexemePairAny' to work with a more meaningful context.
data Lexeme
 =   LexemePairOpen  !(NonEmpty (Cell Pair))
 |   LexemePairClose !(NonEmpty (Cell Pair))
 |   LexemePairAny   !(NonEmpty (Cell Pair))
     -- ^ orientation depending on the surrounding 'Lexeme's,
     -- see 'orientLexemePairAny'
 |   LexemePairBoth  !(NonEmpty (Cell Pair))
 |   LexemeEscape    !(Cell Char)
 |   LexemeLink      !(Cell TL.Text)
 |   LexemeWhite     !(Cell TL.Text)
 |   LexemeAlphaNum  !(Cell TL.Text)
 |   LexemeOther     !(Cell TL.Text)
 |   LexemeTree      !(Tree (Cell Node))
 |   LexemeEnd
 deriving (Eq, Show)
instance Pretty Lexeme

parseTokens :: [Lexeme] -> Tokens
parseTokens ps =
	closePairs $
	appendLexemes mempty $
	-- debug0 "Lexemes (post orient)" $
	orientLexemePairAny $ LexemeEnd :
	ps

parseLexemes ::
 Cell TL.Text ->
 Either (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void)) [Lexeme]
parseLexemes = runParserOnCell (p_Lexemes <* P.eof)

-- | Parse 'Lexeme's, returning them in reverse order
-- to apply 'orientLexemePairAny'.
p_Lexemes :: P.Tokens s ~ TL.Text => Parser e s [Lexeme]
p_Lexemes = debugParser "Lexemes" $ go []
	where
	go :: P.Tokens s ~ TL.Text => [Lexeme] -> Parser e s [Lexeme]
	go acc =
		(P.eof $> acc) <|>
		(p_Lexeme >>= \next -> go $ orientLexemePairAny $ next:acc)

-- | Must be called on parsed 'Lexeme's after the prepending of any new 'Lexeme',
-- so that it can try to orient nearby 'LexemePairAny'
-- to 'LexemePairOpen' or 'LexemePairClose'.
orientLexemePairAny :: [Lexeme] -> [Lexeme]
orientLexemePairAny = \case
	 -- LexemeOther (Cell _bx ex x):LexemeOther (Cell by _ey y):acc -> LexemeOther (Cell by ex (x<>y)):acc
	
	 -- "   
	 t@LexemeTree{}:LexemePairAny p:acc -> t:LexemePairClose p:acc
	 w@LexemeWhite{}:LexemePairAny p:acc -> w:LexemePairClose p:acc
	 LexemeEnd:LexemePairAny p:acc -> LexemePairClose p:acc
	 --    "
	 LexemePairAny p:t@LexemeTree{}:acc -> LexemePairOpen p:t:acc
	 LexemePairAny p:w@LexemeWhite{}:acc -> LexemePairOpen p:w:acc
	 LexemePairAny p:[] -> LexemePairOpen p:[]
	
	 --    ,,,"
	 LexemePairAny p:a@LexemeOther{}:w@LexemeWhite{}:acc -> LexemePairOpen p:a:w:acc
	 LexemePairAny p:a@LexemeOther{}:[] -> LexemePairOpen p:a:[]
	 -- ",,,   
	 w@LexemeWhite{}:a@LexemeOther{}:LexemePairAny p:acc -> w:a:LexemePairClose p:acc
	 LexemeEnd:a@LexemeOther{}:LexemePairAny p:acc -> a:LexemePairClose p:acc
	
	 -- ",,,AAA
	 an@LexemeAlphaNum{}:a@LexemeOther{}:LexemePairAny p:acc -> an:a:LexemePairClose p:acc
	 -- ,,,"AAA
	 an@LexemeAlphaNum{}:LexemePairAny p:a@LexemeOther{}:acc -> an:LexemePairOpen p:a:acc
	
	 -- ")
	 c@LexemePairClose{}:LexemePairAny p:acc -> c:LexemePairClose p:acc
	 -- ("
	 LexemePairAny p:o@LexemePairOpen{}:acc -> LexemePairOpen p:o:acc
	 
	 -- "(
	 o@LexemePairOpen{}:LexemePairAny p:acc -> o:LexemePairClose p:acc
	 -- )"
	 LexemePairAny p:c@LexemePairClose{}:acc -> LexemePairClose p:c:acc
	
	 acc -> acc

p_Lexeme :: P.Tokens s ~ TL.Text => Parser e s Lexeme
p_Lexeme = debugParser "Lexeme" $
	P.choice
	 [ P.try $ LexemeWhite     <$> p_Cell p_Spaces1
	 , P.try $ LexemePairAny   <$> p_some (p_Cell $ p_satisfyMaybe pairAny)
	 , P.try $ LexemePairBoth  <$> p_some (P.try $ p_Cell p_ElemSingle)
	 , P.try $ LexemePairOpen  <$> p_some (p_Cell $ p_satisfyMaybe pairOpen  <|> P.try p_ElemOpen)
	 , P.try $ LexemePairClose <$> p_some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose)
	 , P.try $ LexemeEscape    <$> p_Cell p_Escape
	 , P.try $ LexemeLink      <$> p_Cell p_Link
	 , P.try $ LexemeAlphaNum  <$> p_Cell (P.takeWhile1P (Just "AlphaNum") Char.isAlphaNum)
	 ,         LexemeOther     <$> p_Cell (TL.singleton <$> P.anyChar)
	 ]

p_some :: Parser e s a -> Parser e s (NonEmpty a)
p_some p = NonEmpty.fromList <$> P.some p

pairAny :: Char -> Maybe Pair
pairAny = \case
 '-'  -> Just PairDash
 '/'  -> Just PairSlash
 '"'  -> Just PairDoublequote
 '\'' -> Just PairSinglequote
 '`'  -> Just PairBackquote
 '_'  -> Just PairUnderscore
 '*'  -> Just PairStar
 '#'  -> Just PairHash
 _    -> Nothing

pairOpen :: Char -> Maybe Pair
pairOpen = \case
 '('  -> Just PairParen
 '['  -> Just PairBracket
 '{'  -> Just PairBrace
 '«'  -> Just PairFrenchquote
 _    -> Nothing

pairClose :: Char -> Maybe Pair
pairClose = \case
 ')'  -> Just PairParen
 ']'  -> Just PairBracket
 '}'  -> Just PairBrace
 '»'  -> Just PairFrenchquote
 _    -> Nothing

p_Escape :: Parser e s Char
p_Escape = P.char '\\' *> P.satisfy Char.isPrint

p_Link :: P.Tokens s ~ TL.Text => Parser e s TL.Text
p_Link =
	P.try (P.char '<' *> p <* P.char '>') <|>
	p
	where
	p :: P.Tokens s ~ TL.Text => Parser e s TL.Text
	p =
		(\scheme addr -> scheme <> "//" <> addr)
		 <$> P.option "" (P.try p_scheme)
		 <*  P.string "//"
		 <*> p_addr
	p_scheme :: P.Tokens s ~ TL.Text => Parser e s TL.Text
	p_scheme =
		(<> ":")
		 <$> (P.takeWhile1P (Just "scheme") $ \c ->
			Char.isAlphaNum c
			|| c=='_'
			|| c=='-'
			|| c=='+')
		 <* P.char ':'
	p_addr :: P.Tokens s ~ TL.Text => Parser e s TL.Text
	p_addr =
		P.takeWhileP (Just "addr") $ \c ->
			Char.isAlphaNum c
			|| c=='%'
			|| c=='/'
			|| c=='('
			|| c==')'
			|| c=='-'
			|| c=='_'
			|| c=='.'
			|| c=='#'
			|| c=='?'
			|| c=='='

pairBorders :: Foldable f => Pair -> f a -> (TL.Text,TL.Text)
pairBorders p ts | null ts   = pairBordersSingle p
                 | otherwise = pairBordersDouble p

pairBordersSingle :: Pair -> (TL.Text,TL.Text)
pairBordersSingle = \case
 PairElem n as ->
	("<"<>n<>foldMap f as<>"/>","")
	where f (elemAttr_white,ElemAttr{..}) =
		elemAttr_white <>
		elemAttr_name  <>
		elemAttr_open  <>
		elemAttr_value <>
		elemAttr_close
 p -> pairBordersDouble p

pairBordersDouble :: Pair -> (TL.Text,TL.Text)
pairBordersDouble = \case
 PairElem n as -> ("<"<>n<>foldMap f as<>">","</"<>n<>">")
	where f (elemAttr_white,ElemAttr{..}) =
		elemAttr_white <>
		elemAttr_name  <>
		elemAttr_open  <>
		elemAttr_value <>
		elemAttr_close
 PairHash        -> ("#","#")
 PairStar        -> ("*","*")
 PairSlash       -> ("/","/")
 PairUnderscore  -> ("_","_")
 PairDash        -> ("-","-")
 PairBackquote   -> ("`","`")
 PairSinglequote -> ("'","'")
 PairDoublequote -> ("\"","\"")
 PairFrenchquote -> ("«","»")
 PairParen       -> ("(",")")
 PairBrace       -> ("{","}")
 PairBracket     -> ("[","]")

-- * Class 'TagFrom'
class TagFrom a where
	tagFrom :: a -> Maybe (Cell Tag, a)
instance TagFrom Tokens where
	tagFrom ts =
		case Seq.viewl ts of
		 EmptyL -> Nothing
		 Tree0 (Cell ss0@(Span _f0 _b0 e0:|_s0) n) :< ns ->
			case n of
			 NodeToken (TokenText t) ->
				case tagFrom $ Cell ss0 t of
				 Nothing -> Nothing
				 Just (t0,r0) ->
					if TL.null $ unCell r0
					then
						case tagFrom ns of
						 Just (t1@(Cell (Span _f1 b1 _e1:|_s1) _), r1) | e0 == b1 ->
							Just (t0<>t1, r1)
						 _ -> Just (t0, ns)
					else Just (t0, pure n0 `unionTokens` ns)
					where n0 = Tree0 $ NodeToken . TokenText <$> r0
			 _ -> Nothing
		 _ -> Nothing
instance TagFrom (Cell TL.Text) where
	tagFrom (Cell (Span fp bp ep:|sp) t)
	 | (w,r) <- TL.span isTagChar t
	 , not $ TL.null w
	 , ew <- pos_column bp + sum (Text.length <$> TL.toChunks w) =
		Just
		 ( Cell (Span fp bp bp{pos_column=ew}:|sp) w
		 , Cell (Span fp bp{pos_column=ew} ep:|sp) r )
	tagFrom _ = Nothing

isTagChar :: Char -> Bool
isTagChar c =
	Char.isAlphaNum c ||
	c=='·' ||
	case Char.generalCategory c of
	 Char.DashPunctuation -> True
	 Char.ConnectorPunctuation -> True
	 _ -> False

{-
-- | Build 'Tokens' from many 'Token's.
tokens :: [Cell Token] -> Tokens
tokens ts = Seq.fromList $ Tree0 . (NodeToken <$>) <$> ts

-- | Build 'Tokens' from one 'Token'.
tokens1 :: Tree (Cell Node) -> Tokens
tokens1 = Seq.singleton

unTokenElem :: Tokens -> Maybe (Cell (ElemName,ElemAttrs,Tokens))
unTokenElem toks =
	case toList $ {-Seq.dropWhileR isTokenWhite-} toks of
	 [Tree (Cell bp ep (NodePair (PairElem e as))) ts] -> Just (Cell bp ep (e,as,ts))
	 _ -> Nothing

isTokenElem :: Tokens -> Bool
isTokenElem toks =
	case toList $ {-Seq.dropWhileR isTokenWhite-} toks of
	 [Tree (unCell -> NodePair PairElem{}) _] -> True
	 _ -> False
-}