{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ViewPatterns #-}
module Language.TCT.Read.Token where

import Control.Applicative (Applicative(..), Alternative(..))
import Control.Monad (Monad(..))
import Data.Bool
import Data.Char (Char)
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..))
import Data.Function (($), (.))
import Data.Functor ((<$>), ($>), (<$))
import Data.Maybe (Maybe(..))
import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.Sequence (ViewL(..), (<|))
import Data.Text (Text)
import Data.Text.Buildable (Buildable(..))
import Data.Tuple (fst,snd)
import Prelude (Num(..))
import Text.Show (Show(..))
import qualified Data.Char as Char
import qualified Data.Sequence as Seq
import qualified Data.Text as Text
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as Builder
import qualified Text.Megaparsec as P
import qualified Text.Megaparsec.Char as P

import Language.TCT.Token
import Language.TCT.Cell
import Language.TCT.Elem
import Language.TCT.Read.Elem
import Language.TCT.Read.Cell

textOf :: Buildable a => a -> Text
textOf = TL.toStrict . Builder.toLazyText . build

-- * Type 'Pairs'
type Pairs = (Tokens,[(Cell Pair,Tokens)])

appendToken :: Pairs -> Cell Token -> Pairs
appendToken ps = appendTokens ps . Seq.singleton

appendTokens :: Pairs -> Tokens -> Pairs
appendTokens (t,[])         toks = (t<>toks,[])
appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)

openPair :: Pairs -> Cell Pair -> Pairs
openPair (t,ms) p = (t,(p,mempty):ms)

-- | Close a 'Pair' when there is a matching 'LexemePairClose'.
closePair :: Pairs -> Cell Pair -> Pairs
closePair ps@(_,[]) (Cell bp ep p) = dbg "closePair" $
	appendToken ps $
	Cell bp ep $
	TokenPlain $ snd $ pairBorders p tokensPlainEmpty
closePair (t,(p1,t1):ts) p = dbg "closePair" $
	case (p1,p) of
	 (Cell bx _ex (PairElem x ax), Cell _by ey (PairElem y ay)) | x == y ->
		appendToken (t,ts) $
		Cell bx ey $
		TokenPair (PairElem x (ax<>ay)) t1
	 (Cell bx _ex x, Cell _by ey y) | x == y ->
		appendToken (t,ts) $
		Cell bx ey $
		TokenPair x t1
	 _ ->
		(`closePair` p) $
		appendTokens
		 (t,ts)
		 (closeUnpaired mempty (p1,t1))

-- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
closeUnpaired :: Tokens -> (Cell Pair,Tokens) -> Tokens
closeUnpaired acc (Cell bp ep p,toks) = dbg "closeUnpaired" $
	case p of
	 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
	 PairHash | (Cell bt et (TokenPlain t)) :< ts <- Seq.viewl $ toks <> acc ->
		case Text.findIndex (not . isTagChar) t of
		 -- Just 0 -> toksHash mempty <> toks <> acc
		 Just i ->
			Cell bp bt{columnPos = columnPos bt + i} (TokenTag tag)
			 <| Cell bt{columnPos = columnPos bt + i + 1} et (TokenPlain t')
			 <| ts
			where (tag,t') = Text.splitAt i t
		 Nothing | Text.null t -> toksHash mempty <> toks <> acc
		 Nothing -> Cell bp et (TokenTag t) <| ts
	 _ -> toksHash tokensPlainEmpty <> toks <> acc
	where
	toksHash = tokens1 . Cell bp ep . TokenPlain . fst . pairBorders p
	isTagChar c =
		Char.isAlphaNum c ||
		c=='·' ||
		case Char.generalCategory c of
		 Char.DashPunctuation -> True
		 Char.ConnectorPunctuation -> True
		 _ -> False

-- | Close remaining 'Pair's at end of parsing.
closePairs :: Pairs -> Tokens
closePairs (t0,ps) = dbg "closePairs" $
	t0 <> foldl' closeUnpaired mempty ps

appendLexeme :: Lexeme -> Pairs -> Pairs
appendLexeme lex acc =
	dbg "appendLexeme" $
	case lex of
	 LexemePairOpen ps -> foldl' open acc ps
		where
		open a p@(Cell _bp ep (PairElem{})) = openPair a p `appendToken` (Cell ep ep $ TokenPlain "")
		open a p                            = openPair a p
	 LexemePairClose ps -> foldl' closePair acc ps
	 LexemePairAny   ps -> appendTokens acc $ tokens $ ((\p -> TokenPlain $ fst $ pairBorders p mempty) <$>) <$> ps
	 LexemePairBoth  ps -> appendTokens acc $ tokens $ ((`TokenPair`mempty) <$>) <$> ps
	 LexemeEscape     c -> appendToken  acc $ TokenEscape <$> c
	 LexemeLink       t -> appendToken  acc $ TokenLink <$> t
	 LexemeWhite (unCell -> "") -> acc
	 LexemeWhite     cs -> appendToken  acc $ TokenPlain <$> cs
	 LexemeAlphaNum  cs -> appendToken  acc $ TokenPlain . Text.pack <$> cs
	 LexemeAny       cs -> appendToken  acc $ TokenPlain . Text.pack <$> cs
	 LexemeToken     ts -> appendTokens acc ts

-- * Type 'Lexeme'
data Lexeme
 =   LexemePairOpen  ![Cell Pair]
 |   LexemePairClose ![Cell Pair]
 |   LexemePairAny   ![Cell Pair]
 |   LexemePairBoth  ![Cell Pair]
 |   LexemeEscape    !(Cell Char)
 |   LexemeLink      !(Cell Text)
 |   LexemeWhite     !(Cell White)
 |   LexemeAlphaNum  !(Cell [Char])
 |   LexemeAny       !(Cell [Char])
 |   LexemeToken     !Tokens
 deriving (Eq, Show)

p_Tokens :: Parser e s Tokens
p_Tokens = pdbg "Tokens" $
	closePairs .
	foldr appendLexeme mempty .
	dbg "Lexemes" .
	mangleLexemes .
	(LexemeWhite (cell0 "") :) <$>
	go [LexemeWhite (cell0 "")]
	where
	go :: [Lexeme] -> Parser e s [Lexeme]
	go acc =
		(P.eof $> acc) <|>
		(p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
	
	mangleLexemes = \case
	 LexemeAny (Cell _bx ex x):LexemeAny (Cell by _ey y):acc -> LexemeAny (Cell by ex (x<>y)):acc
	
	 -- "   
	 w@LexemeWhite{}:LexemePairAny p:acc -> w:LexemePairClose p:acc
	 --    "
	 LexemePairAny p:w@LexemeWhite{}:acc -> LexemePairOpen p:w:acc
	
	 --    ,,,"
	 LexemePairAny p:a@LexemeAny{}:w@LexemeWhite{}:acc -> LexemePairOpen p:a:w:acc
	 -- ",,,   
	 w@LexemeWhite{}:a@LexemeAny{}:LexemePairAny p:acc -> w:a:LexemePairClose p:acc
	
	 -- ",,,AAA
	 an@LexemeAlphaNum{}:a@LexemeAny{}:LexemePairAny p:acc -> an:a:LexemePairClose p:acc
	 -- ,,,"AAA
	 an@LexemeAlphaNum{}:LexemePairAny p:a@LexemeAny{}:acc -> an:LexemePairOpen p:a:acc
	
	 -- ")
	 c@LexemePairClose{}:LexemePairAny p:acc -> c:LexemePairClose p:acc
	 -- ("
	 LexemePairAny p:o@LexemePairOpen{}:acc -> LexemePairOpen p:o:acc
	 
	 -- "(
	 o@LexemePairOpen{}:LexemePairAny p:acc -> o:LexemePairClose p:acc
	 -- )"
	 LexemePairAny p:c@LexemePairClose{}:acc -> c:LexemePairClose p:acc
	
	 acc -> acc

pairAny :: Char -> Maybe Pair
pairAny = \case
 '-'  -> Just PairDash
 '/'  -> Just PairSlash
 '"'  -> Just PairDoublequote
 '\'' -> Just PairSinglequote
 '`'  -> Just PairBackquote
 '_'  -> Just PairUnderscore
 '*'  -> Just PairStar
 '#'  -> Just PairHash
 _    -> Nothing

pairOpen :: Char -> Maybe Pair
pairOpen = \case
 '('  -> Just PairParen
 '['  -> Just PairBracket
 '{'  -> Just PairBrace
 '«'  -> Just PairFrenchquote
 _    -> Nothing

pairClose :: Char -> Maybe Pair
pairClose = \case
 ')'  -> Just PairParen
 ']'  -> Just PairBracket
 '}'  -> Just PairBrace
 '»'  -> Just PairFrenchquote
 _    -> Nothing

p_Cell :: Parser e s a -> Parser e s (Cell a)
p_Cell pa = do
	bp <- p_Position
	a  <- pa
	ep <- p_Position
	return $ Cell bp ep a

p_Lexeme :: Parser e s Lexeme
p_Lexeme = pdbg "Lexeme" $
	P.choice
	 [ P.try $ LexemeWhite       <$> p_Cell p_Spaces
	 , P.try $ LexemePairAny     <$> P.some (p_Cell $ p_satisfyMaybe pairAny)
	 , P.try $ LexemePairBoth    <$> P.some (P.try $ p_Cell p_ElemSingle)
	 , P.try $ LexemePairOpen    <$> P.some (p_Cell $ p_satisfyMaybe pairOpen  <|> P.try p_ElemOpen)
	 , P.try $ LexemePairClose   <$> P.some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose)
	 , P.try $ LexemeEscape      <$> p_Cell p_Escape
	 , P.try $ LexemeLink        <$> p_Cell p_Link
	 , P.try $ LexemeAlphaNum    <$> p_Cell (P.some p_AlphaNum)
	 ,         LexemeAny         <$> p_Cell (pure <$> P.anyChar)
	 ]

p_AlphaNum :: Parser e s Char
p_AlphaNum = P.satisfy Char.isAlphaNum

p_Escape :: Parser e s Char
p_Escape = P.char '\\' *> P.satisfy Char.isPrint

p_Link :: Parser e s Text
p_Link =
	(\scheme addr -> Text.pack $ scheme <> "//" <> addr)
	 <$> P.option "" (P.try p_scheme)
	 <*  P.string "//"
	 <*> p_addr
	where
	p_scheme =
		(<> ":")
		 <$> P.some (P.satisfy $ \c ->
			Char.isAlphaNum c
			|| c=='_'
			|| c=='-'
			|| c=='+')
		 <* P.char ':'
	p_addr =
		P.many $
			P.satisfy $ \c ->
				Char.isAlphaNum c
				|| c=='%'
				|| c=='/'
				|| c=='('
				|| c==')'
				|| c=='-'
				|| c=='_'
				|| c=='.'
				|| c=='#'
				|| c=='?'
				|| c=='='

p_ElemSingle :: Parser e s Pair
p_ElemSingle = pdbg "ElemSingle" $
	PairElem
	 <$  P.char '<'
	 <*> p_Word
	 <*> p_Attrs
	 <*  P.string "/>"

p_ElemOpen :: Parser e s Pair
p_ElemOpen = pdbg "ElemOpen" $
	PairElem
	 <$  P.char '<'
	 <*> p_Word
	 <*> p_Attrs
	 <*  P.char '>'

p_ElemClose :: Parser e s Pair
p_ElemClose = pdbg "ElemClose" $
	(`PairElem` [])
	 <$  P.string "</"
	 <*> p_Word
	 <*  P.char '>'

{-
p_ElemOpenOrSingle :: Parser e s Pair
p_ElemOpenOrSingle =
	p_ElemOpen >>= \p ->
		P.char    '>' $> LexemePairOpen p <|>
		P.string "/>" $> LexemePairAny  p
-}