{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE StandaloneDeriving #-}
-- | This module defines symantics
-- for regular or context-free grammars.
--
-- The default grammar can be printed in 'EBNF'
-- with: @cabal test ebnf --show-details=always@.
module Language.Symantic.Parsing.Grammar where

import Control.Applicative (Applicative(..))
import Control.Monad
import qualified Data.Bool as Bool
import qualified Data.Char as Char
import Data.Foldable hiding (any)
import Data.Semigroup hiding (option)
import Data.String (IsString(..))
import Data.Text (Text)
import Prelude hiding (any)

-- * Class 'Gram_Rule'
type Id a = a -> a
class Gram_Rule g where
	rule :: Text -> Id (g a)
	rule _n = id
	rule1 :: Text -> Id (g a -> g b)
	rule1 _n g = g
	rule2 :: Text -> Id (g a -> g b -> g c)
	rule2 _n g = g
	rule3 :: Text -> Id (g a -> g b -> g c -> g d)
	rule3 _n g = g
	rule4 :: Text -> Id (g a -> g b -> g c -> g d -> g e)
	rule4 _n g = g

-- * Type 'Terminal'
-- | Terminal grammar.
newtype Terminal g a
 =      Terminal { unTerminal :: g a }
 deriving (Functor, Gram_Terminal)
deriving instance Gram_Rule g => Gram_Rule (Terminal g)

-- ** Class 'Gram_Terminal'
-- | Symantics for terminal grammars.
class Gram_Terminal g where
	any    :: g Char
	but    :: Terminal g Char -> Terminal g Char -> Terminal g Char
	eoi    :: g ()
	char   :: Char -> g Char
	string :: String -> g String
	unicat :: Unicat -> g Char
	range  :: (Char, Char) -> g Char
	-- string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "")
	-- string [] = pure []
	-- string (c:cs) = (:) <$> char c <*> string cs

-- *** Type 'Unicat'
-- | Unicode category.
data Unicat
 = Unicat_Letter
 | Unicat_Mark
 | Unicat_Number
 | Unicat_Punctuation
 | Unicat_Symbol
 | Unicat Char.GeneralCategory
 deriving (Eq, Show)

unicode_categories :: Unicat -> [Char.GeneralCategory]
unicode_categories c =
	case c of
	 Unicat_Letter ->
		 [ Char.UppercaseLetter
		 , Char.LowercaseLetter
		 , Char.TitlecaseLetter
		 , Char.ModifierLetter
		 , Char.OtherLetter
		 ]
	 Unicat_Mark ->
		 [ Char.NonSpacingMark
		 , Char.SpacingCombiningMark
		 , Char.EnclosingMark
		 ]
	 Unicat_Number ->
		 [ Char.DecimalNumber
		 , Char.LetterNumber
		 , Char.OtherNumber
		 ]
	 Unicat_Punctuation ->
		 [ Char.ConnectorPunctuation
		 , Char.DashPunctuation
		 , Char.OpenPunctuation
		 , Char.ClosePunctuation
		 , Char.OtherPunctuation
		 ]
	 Unicat_Symbol ->
		 [ Char.MathSymbol
		 , Char.CurrencySymbol
		 , Char.ModifierSymbol
		 , Char.OtherSymbol
		 ]
	 Unicat cat -> [cat]

-- * Type 'Reg'
-- | Left or right regular grammar.
newtype Reg (lr::LR) g a = Reg { unReg :: g a }
 deriving (IsString, Functor, Gram_Terminal)
deriving instance Alter g => Alter (Reg lr g)
deriving instance Gram_Rule g => Gram_Rule (Reg lr g)
deriving instance (Functor g, Alter g, Gram_RegL g) => Gram_RegL (RegL g)
deriving instance (Functor g, Alter g, Gram_RegR g) => Gram_RegR (RegR g)

reg_of_term :: Terminal g a -> Reg lr g a
reg_of_term (Terminal g) = Reg g

-- ** Type 'LR'
data LR
 = L -- ^ Left
 | R -- ^ Right
 deriving (Eq, Show)
type RegL = Reg 'L
type RegR = Reg 'R

-- ** Class 'Alter'
-- | Like 'Alternative' but without the 'Applicative' super-class,
-- because a regular grammar is not closed under 'Applicative'.
-- And also because the alternative operator has to backtrack
-- when the first alternative fails.
class Alter g where
	empty  :: g a
	(<+>)  :: g a -> g a -> g a
	choice :: [g a] -> g a
	choice = foldr (<+>) empty
deriving instance Alter p => Alter (Terminal p)

infixl 3 <+>

-- ** Class 'Gram_RegR'
-- | Symantics for right regular grammars.
class (Functor g, Alter g) => Gram_RegR g where
	(.*>) :: Terminal g (a -> b) -> RegR g a -> RegR g b
	manyR :: Terminal g a -> RegR g [a]
	manyR g = (:) <$> g .*> manyR g <+> empty
	someR :: Terminal g a -> RegR g [a]
	someR g = (:) <$> g .*> manyR g
infixl 4 .*>

-- ** Class 'Gram_RegL'
-- | Symantics for left regular grammars.
class (Functor g, Alter g) => Gram_RegL g where
	(<*.) :: RegL g (a -> b) -> Terminal g a -> RegL g b
	manyL :: Terminal g a -> RegL g [a]
	manyL g' = reverse <$> go g'
		where go g = flip (:) <$> go g <*. g <+> empty
	someL :: Terminal g a -> RegL g [a]
	someL g = (\cs c -> cs ++ [c]) <$> manyL g <*. g
infixl 4 <*.

-- * Class 'Alt'
class (Alter g, Applicative g) => Alt g where
	option :: a -> g a -> g a
	option x g = g <+> pure x
	optional :: g a -> g (Maybe a)
	optional v = Just <$> v <+> pure Nothing
	many :: g a -> g [a]
	many a = some a <+> pure []
	some :: g a -> g [a]
	some a = (:) <$> a <*> many a
	skipMany :: g a -> g ()
	skipMany = void . many
	--manyTill :: g a -> g end -> g [a]
	--manyTill g end = go where go = ([] <$ end) <|> ((:) <$> g <*> go)

-- * Class 'App'
class Applicative g => App g where
	between :: g open -> g close -> g a -> g a
	between open close g = open *> g <* close

-- * Type 'CF'
-- | Context-free grammar.
newtype CF g a = CF { unCF :: g a }
 deriving (IsString, Functor, Gram_Terminal, Applicative, App)
deriving instance Alter g => Alter (CF g)
deriving instance Alt g => Alt (CF g)
deriving instance Gram_Rule g => Gram_Rule (CF g)
deriving instance Gram_RegL g => Gram_RegL (CF g)
deriving instance Gram_RegR g => Gram_RegR (CF g)
deriving instance Gram_CF   g => Gram_CF   (CF g)

cf_of_term :: Terminal g a -> CF g a
cf_of_term (Terminal g) = CF g

cf_of_reg :: Reg lr g a -> CF g a
cf_of_reg (Reg g) = CF g

-- ** Class 'Gram_CF'
-- | Symantics for context-free grammars.
class Gram_CF g where
	-- | NOTE: CFL ∩ RL is a CFL.
	-- See ISBN 81-7808-347-7, Theorem 7.27, g.286
	(<&) :: CF g (a -> b) -> Reg lr g a -> CF g b
	(&>) :: Reg lr g (a -> b) -> CF g a -> CF g b
	-- | NOTE: CFL - RL is a CFL.
	-- See ISBN 81-7808-347-7, Theorem 7.29, g.289
	minus :: CF g a -> Reg lr g b -> CF g a
infixl 4 <&
infixl 4  &>

-- * Class 'Gram_Meta'
class Gram_Meta meta g where
	metaG :: g (meta -> a) -> g a
instance Gram_Meta meta g => Gram_Meta meta (CF g) where
	metaG = CF . metaG . unCF

-- * Class 'Gram_Lexer'
class
 ( Alt g
 , Alter g
 , App g
 , Gram_CF g
 , Gram_Rule g
 , Gram_Terminal g
 ) => Gram_Lexer g where
	commentable :: g () -> g () -> g () -> g ()
	commentable = rule3 "commentable" $ \g line block ->
		skipMany $ choice [g, line, block]
	comment_line :: CF g String -> CF g String
	comment_line prefix = rule "comment_line" $
		prefix *> many (any `minus` (void (char '\n') <+> eoi))
	comment_block :: CF g String -> Reg lr g String -> CF g String
	comment_block start end = rule "comment_block" $
		start *> many (any `minus` void end)
	lexeme :: CF g a -> CF g a
	lexeme = rule1 "lexeme" $ \g ->
		g <* commentable
		 (void $ char ' ')
		 (void $ comment_line (string "--"))
		 (void $ comment_block (string "{-") (string "-}"))
	parens :: CF g a -> CF g a
	parens = rule1 "parens" $
		between
		 (lexeme $ string "(")
		 (lexeme $ string ")")
	operators
	 :: CF g a -- ^ expression
	 -> CF g (Unifix, a -> a) -- ^ prefix operator
	 -> CF g (Infix , a -> a -> a) -- ^ infix operator
	 -> CF g (Unifix, a -> a) -- ^ postfix operator
	 -> CF g (Either Error_Fixity a)
	operators g prG iG poG =
		(evalOpTree <$>)
		 <$> go g prG iG poG
		where
		go
		 :: CF g a
		 -> CF g (Unifix, a -> a)
		 -> CF g (Infix , a -> a -> a)
		 -> CF g (Unifix, a -> a)
		 -> CF g (Either Error_Fixity (OpTree a))
		go = rule4 "operators" $ \aG preG inG postG ->
			(\pres a posts ->
				let nod_a =
					foldr insert_unifix
					 (foldl' (flip insert_unifix) (OpNode0 a) posts)
					 pres
				in \case
				 Just (in_, b) -> insert_infix nod_a in_ b
				 Nothing -> Right nod_a)
			 <$> many preG
			 <*> aG
			 <*> many postG
			 <*> option Nothing (curry Just <$> inG <*> go aG preG inG postG)
		
		insert_unifix :: (Unifix, a -> a) -> OpTree a -> OpTree a
		insert_unifix a@(uni_a@(Prefix prece_a), op_a) nod_b =
			case nod_b of
			 OpNode0{} -> OpNode1 uni_a op_a nod_b
			 OpNode1 Prefix{} _op_b _nod -> OpNode1 uni_a op_a nod_b
			 OpNode1 uni_b@(Postfix prece_b) op_b nod ->
				case prece_b `compare` prece_a of
				 GT -> OpNode1 uni_a op_a nod_b
				 EQ -> OpNode1 uni_a op_a nod_b
				 LT -> OpNode1 uni_b op_b $ insert_unifix a nod
			 OpNode2 inf_b op_b l r ->
				case infix_prece inf_b `compare` prece_a of
				 GT -> OpNode1 uni_a op_a nod_b
				 EQ -> OpNode1 uni_a op_a nod_b
				 LT -> OpNode2 inf_b op_b (insert_unifix a l) r
		insert_unifix a@(uni_a@(Postfix prece_a), op_a) nod_b =
			case nod_b of
			 OpNode0{} -> OpNode1 uni_a op_a nod_b
			 OpNode1 uni_b@(Prefix prece_b) op_b nod ->
				case prece_b `compare` prece_a of
				 GT -> OpNode1 uni_a op_a nod_b
				 EQ -> OpNode1 uni_a op_a nod_b
				 LT -> OpNode1 uni_b op_b $ insert_unifix a nod
			 OpNode1 Postfix{} _op_b _nod -> OpNode1 uni_a op_a nod_b
			 OpNode2 inf_b op_b l r ->
				case infix_prece inf_b `compare` prece_a of
				 GT -> OpNode1 uni_a op_a nod_b
				 EQ -> OpNode1 uni_a op_a nod_b
				 LT -> OpNode2 inf_b op_b l (insert_unifix a r)
		
		insert_infix
		 :: OpTree a
		 -> (Infix, a -> a -> a)
		 -> Either Error_Fixity (OpTree a)
		 -> Either Error_Fixity (OpTree a)
		insert_infix nod_a in_@(inf_a, op_a) e_nod_b = do
			nod_b <- e_nod_b
			case nod_b of
			 OpNode0{} -> Right $ OpNode2 inf_a op_a nod_a nod_b
			 OpNode1 uni_b op_b nod ->
				case unifix_prece uni_b `compare` infix_prece inf_a of
				 EQ -> Right $ OpNode2 inf_a op_a nod_a nod_b
				 GT -> Right $ OpNode2 inf_a op_a nod_a nod_b
				 LT -> do
					n <- insert_infix nod_a in_ (Right nod)
					Right $ OpNode1 uni_b op_b n
			 OpNode2 inf_b op_b l r ->
				case infix_prece inf_b `compare` infix_prece inf_a of
				 GT -> Right $ OpNode2 inf_a op_a nod_a nod_b
				 LT -> do
					n <- insert_infix nod_a in_ (Right l)
					Right $ OpNode2 inf_b op_b n r
				 EQ ->
					let ass = \case
						 AssocL -> L
						 AssocR -> R
						 AssocB lr -> lr in
					case (ass <$> infix_assoc inf_b, ass <$> infix_assoc inf_a) of
					 (Just L, Just L) -> do
						n <- insert_infix nod_a in_ (Right l)
						Right $ OpNode2 inf_b op_b n r
					 (Just R, Just R) ->
						Right $ OpNode2 inf_a op_a nod_a nod_b
					 _ -> Left $ Error_Fixity_Infix_not_combinable inf_a inf_b
						 -- NOTE: non-associating infix ops
						 -- of the same precedence cannot be mixed.
	infixrG :: CF g a -> CF g (a -> a -> a) -> CF g a
	infixrG = rule2 "infixr" $ \g opG ->
		(\a -> \case
		 Just (op, b) -> a `op` b
		 Nothing -> a)
		 <$> g
		 <*> option Nothing (curry Just <$> opG <*> infixrG g opG)
	infixlG :: CF g a -> CF g (a -> a -> a) -> CF g a
	infixlG = rule2 "infixl" $ \g opG ->
		-- NOTE: infixl uses the same grammar than infixr,
		-- but build the parsed value by applying
		-- the operator in the opposite way.
		($ id) <$> go g opG
		where
		go :: CF g a -> CF g (a -> a -> a) -> CF g ((a -> a) -> a)
		go g opG =
			(\a -> \case
			 Just (op, kb) -> \k -> kb (k a `op`)
			 Nothing -> ($ a))
			 <$> g
			 <*> option Nothing (curry Just <$> opG <*> go g opG)
	inside :: (a -> b) -> CF g begin -> CF g a -> CF g end -> CF g b -> CF g b
	inside f = rule4 "inside" $ \begin i end n ->
		(f <$ begin <*> i <* end) <+> n
	symbol :: String -> CF g String
	symbol = lexeme . string

deriving instance Gram_Lexer g => Gram_Lexer (CF g)

-- ** Type 'Error_Fixity'
data Error_Fixity
 =   Error_Fixity_Infix_not_combinable Infix Infix
 |   Error_Fixity_NeedPostfixOrInfix
 |   Error_Fixity_NeedPrefix
 |   Error_Fixity_NeedPostfix
 |   Error_Fixity_NeedInfix
 deriving (Eq, Show)

-- ** Type 'NeedFixity'
data NeedFixity
 =   NeedPrefix
 |   NeedPostfix
 |   NeedPostfixOrInfix
 deriving (Eq, Ord, Show)

-- ** Type 'Fixity'
data Fixity a
 =   FixityPrefix  Unifix (a -> a)
 |   FixityPostfix Unifix (a -> a)
 |   FixityInfix   Infix  (a -> a -> a)

-- ** Type 'Precedence'
type Precedence = Int

-- ** Type 'Associativity'
-- type Associativity = LR
data Associativity
 =   AssocL -- ^ Associate to the left:  @a ¹ b ² c == (a ¹ b) ² c@
 |   AssocR -- ^ Associate to the right: @a ¹ b ² c == a ¹ (b ² c)@
 |   AssocB LR -- ^ Associate to both side, but to 'LR' when reading.
 deriving (Eq, Show)

-- ** Type 'Unifix'
data Unifix
 =   Prefix  { unifix_prece :: Precedence }
 |   Postfix { unifix_prece :: Precedence }
 deriving (Eq, Show)

-- ** Type 'Infix'
data Infix
 =   Infix
 {   infix_assoc :: Maybe Associativity
 ,   infix_prece :: Precedence
 } deriving (Eq, Show)

infixL :: Precedence -> Infix
infixL = Infix (Just AssocL)

infixR :: Precedence -> Infix
infixR = Infix (Just AssocR)

infixB :: LR -> Precedence -> Infix
infixB = Infix . Just . AssocB

infixN :: Precedence -> Infix
infixN = Infix Nothing

infixN0 :: Infix
infixN0 = infixN 0

infixN5 :: Infix
infixN5 = infixN 5

infix_paren
 :: (Semigroup s, IsString s)
 => (Infix, LR) -> Infix -> s -> s
infix_paren (po, lr) op s =
	if infix_prece op <  infix_prece po
	|| infix_prece op == infix_prece po
	&& Bool.not associate
	 then fromString "(" <> s <> fromString ")"
	 else s
	where
	associate =
		case (lr, infix_assoc po) of
		 (_, Just AssocB{}) -> True
		 (L, Just AssocL) -> True
		 (R, Just AssocR) -> True
		 _ -> False

-- ** Type 'OpTree'
data OpTree a
 =   OpNode0 a
 |   OpNode1 Unifix (a -> a)      (OpTree a)
 |   OpNode2 Infix  (a -> a -> a) (OpTree a) (OpTree a)

-- | Collapse an 'OpTree'.
evalOpTree :: OpTree a -> a
evalOpTree (OpNode0 a) = a
evalOpTree (OpNode1 _uni op n) = op $ evalOpTree n
evalOpTree (OpNode2 _inf op l r) = evalOpTree l `op` evalOpTree r