{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PolyKinds #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeFamilies #-}
{-# OPTIONS_GHC -fno-warn-tabs #-}
-- | This module defines symantics
-- for regular or context-free grammars.
module Language.Symantic.Parsing.Grammar where

import Control.Applicative (Applicative(..), Alternative(..))
import Control.Monad
import Data.Char as Char
import Data.Foldable hiding (any)
import qualified Data.List as List
import Data.Semigroup hiding (option)
import Data.String (IsString(..))
import Data.Text (Text)
import qualified Data.Text as Text
import Prelude hiding (any)

-- * Class 'Gram_Rule'
type Id a = a -> a
class Gram_Rule p where
	rule :: Text -> Id (p a)
	rule _n = id
	rule1 :: Text -> Id (p a -> p b)
	rule1 _n p = p
	rule2 :: Text -> Id (p a -> p b -> p c)
	rule2 _n p = p
	rule3 :: Text -> Id (p a -> p b -> p c -> p d)
	rule3 _n p = p
	rule4 :: Text -> Id (p a -> p b -> p c -> p d -> p e)
	rule4 _n p = p

-- * Type 'Term'
-- | Terminal grammar.
newtype Term p a
 =      Term { unTerm :: p a }
 deriving (Functor, Gram_Term)

-- ** Class 'Gram_Term'
-- | Symantics for terminal grammars.
class Gram_Term p where
	any    :: p Char
	eof    :: p ()
	char   :: Char -> p Char
	string :: String -> p String
	unicat :: Unicat -> p Char
	range  :: (Char, Char) -> p Char
	-- string = foldr (\c -> (<*>) ((:) <$> char c)) (pure "")
	-- string [] = pure []
	-- string (c:cs) = (:) <$> char c <*> string cs

-- *** Type 'Unicat'
-- | Unicode category.
data Unicat
 = Unicat_Letter
 | Unicat_Mark
 | Unicat_Number
 | Unicat_Punctuation
 | Unicat_Symbol
 | Unicat Char.GeneralCategory
 deriving (Eq, Show)

unicode_categories :: Unicat -> [Char.GeneralCategory]
unicode_categories c =
	case c of
	 Unicat_Letter ->
		 [ UppercaseLetter
		 , LowercaseLetter
		 , TitlecaseLetter
		 , ModifierLetter
		 , OtherLetter
		 ]
	 Unicat_Mark ->
		 [ NonSpacingMark
		 , SpacingCombiningMark
		 , EnclosingMark
		 ]
	 Unicat_Number ->
		 [ DecimalNumber
		 , LetterNumber
		 , OtherNumber
		 ]
	 Unicat_Punctuation ->
		 [ ConnectorPunctuation
		 , DashPunctuation
		 , OpenPunctuation
		 , ClosePunctuation
		 , OtherPunctuation
		 ]
	 Unicat_Symbol ->
		 [ MathSymbol
		 , CurrencySymbol
		 , ModifierSymbol
		 , OtherSymbol
		 ]
	 Unicat cat -> [cat]

-- * Type 'Reg'
-- | Left or right regular grammar.
newtype Reg (lr::LR) p a = Reg { unReg :: p a }
 deriving (IsString, Functor, Gram_Term, Alter)
deriving instance Gram_Rule p => Gram_Rule (Reg lr p)
deriving instance (Functor p, Alter p, Gram_RegL p) => Gram_RegL (RegL p)
deriving instance (Functor p, Alter p, Gram_RegR p) => Gram_RegR (RegR p)

-- ** Type 'LR'
data LR
 = L -- ^ Left
 | R -- ^ Right
 deriving (Eq, Show)
type RegL = Reg 'L
type RegR = Reg 'R

-- ** Class 'Alter'
-- | Like 'Alternative' but without the 'Applicative' super-class,
-- because a regular grammar is not closed under 'Applicative'.
class Alter p where
	nil    :: p a
	(<+>)  :: p a -> p a -> p a
	choice :: [p a] -> p a
	default nil    :: Alternative p => p a
	default (<+>)  :: Alternative p => p a -> p a -> p a
	default choice :: Alternative p => [p a] -> p a
	nil    = empty
	(<+>)  = (<|>)
	choice = foldr (<+>) empty
infixl 3 <+>

-- ** Class 'Gram_RegR'
-- | Symantics for right regular grammars.
class (Functor p, Alter p) => Gram_RegR p where
	(.*>) :: Term p (a -> b) -> RegR p a -> RegR p b
	manyR :: Term p a -> RegR p [a]
	manyR p = (:) <$> p .*> manyR p <+> nil
	someR :: Term p a -> RegR p [a]
	someR p = (:) <$> p .*> manyR p
infixl 4 .*>

-- ** Class 'Gram_RegL'
-- | Symantics for left regular grammars.
class (Functor p, Alter p) => Gram_RegL p where
	(<*.) :: RegL p (a -> b) -> Term p a -> RegL p b
	manyL :: Term p a -> RegL p [a]
	manyL p' = reverse <$> go p'
		where go p = flip (:) <$> go p <*. p <+> nil
	someL :: Term p a -> RegL p [a]
	someL p = (\cs c -> cs ++ [c]) <$> manyL p <*. p
infixl 4 <*.

-- * Type 'CF'
-- | Context-free grammar.
newtype CF p a = CF { unCF :: p a }
 deriving (IsString, Functor, Gram_Term, Applicative, App, Alternative, Alter, Alt)
deriving instance Gram_Rule p => Gram_Rule (CF p)
deriving instance Gram_CF   p => Gram_CF   (CF p)

cf_of_reg :: Reg lr p a -> CF p a
cf_of_reg (Reg p) = CF p

-- ** Class 'Gram_CF'
-- | Symantics for context-free grammars.
class Gram_CF p where
	-- | NOTE: CFL ∩ RL is a CFL.
	-- See ISBN 81-7808-347-7, Theorem 7.27, p.286
	(<&) :: CF p (a -> b) -> Reg lr p a -> CF p b
	(&>) :: Reg lr p (a -> b) -> CF p a -> CF p b
	-- | NOTE: CFL - RL is a CFL.
	-- See ISBN 81-7808-347-7, Theorem 7.29, p.289
	but :: CF p a -> Reg lr p b -> CF p a
infixl 4 <&
infixl 4  &>

-- ** Class 'App'
class Applicative p => App p where
	between :: p open -> p close -> p a -> p a
	between open close p = open *> p <* close

-- ** Class 'Alt'
class Alternative p => Alt p where
	option :: a -> p a -> p a
	option x p = p <|> pure x
	skipMany :: p a -> p ()
	skipMany = void . many
	--manyTill :: p a -> p end -> p [a]
	--manyTill p end = go where go = ([] <$ end) <|> ((:) <$> p <*> go)

-- * Type 'EBNF'
-- | Extended Bachus-Norm Form, following the
-- <http://standards.iso.org/ittf/PubliclyAvailableStandards/s026153_ISO_IEC_14977_1996(E).zip ISO-IEC-14977>
-- notations, augmented with the following notations:
--
-- * @("U+", code_point)@: for <http://unicode.org/versions/Unicode8.0.0/ ISO-IEC-10646> (aka. Unicode).
-- * @(rule, "&", rule)@: for the intersection.
-- * @(rule, "-", rule)@: for the difference.
-- * @(rule, " ", rule)@: for rule application.
data EBNF a = EBNF { unEBNF :: RuleMode -> (Op, LR) -> Text }

runEBNF :: EBNF a -> Text
runEBNF (EBNF p) = p RuleMode_Body (nop, L)

-- | Get textual rendition of given EBNF rule.
renderEBNF :: RuleDef a -> Text
renderEBNF = runEBNF . unRuleDef

ebnf_const :: Text -> EBNF a
ebnf_const t = EBNF $ \_rm _op -> t

-- ** Type 'RuleDef'
newtype RuleDef a = RuleDef { unRuleDef :: EBNF a }
 deriving (Functor, Gram_Term, Applicative, App
 , Alternative, Alter, Alt, Gram_RegL, Gram_RegR, Gram_CF)
deriving instance Gram_RuleDef RuleDef
deriving instance Gram_RuleDef p => Gram_RuleDef (RegR p)
deriving instance Gram_RuleDef p => Gram_RuleDef (RegL p)
deriving instance Gram_RuleDef p => Gram_RuleDef (CF p)

instance Gram_Rule RuleDef where
	rule  n           = rule_def (ebnf_const n)
	rule1 n p a       = rule_def (ebnf_const n `ebnf_arg` unRuleDef a) (p a)
	rule2 n p a b     = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b) (p a b)
	rule3 n p a b c   = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b `ebnf_arg` unRuleDef c) (p a b c)
	rule4 n p a b c d = rule_def (ebnf_const n `ebnf_arg` unRuleDef a `ebnf_arg` unRuleDef b `ebnf_arg` unRuleDef c `ebnf_arg` unRuleDef d) (p a b c d)

-- *** Class 'Gram_RuleDef'
class Gram_RuleDef p where
	rule_def :: EBNF () -> p a -> RuleDef a
	rule_arg :: Text -> p a

-- | Helper for 'Gram_Rule' 'EBNF'.
ebnf_arg :: EBNF a -> EBNF b -> EBNF ()
ebnf_arg (EBNF a) (EBNF b) = EBNF $ \bo po -> op_paren po op $
	a bo (op, L) <> " " <> b bo (op, R)
	where op = Op " " 11 AssocL
infixl 5 `ebnf_arg`

instance Gram_RuleDef EBNF where
	rule_arg = ebnf_const
	rule_def call body =
		RuleDef $ EBNF $ \mo po ->
			case mo of
			 RuleMode_Ref -> unEBNF call mo po
			 RuleMode_Body ->
				Text.intercalate " " $ concat $
				 [ [unEBNF call RuleMode_Ref (nop, L)]
				 , ["="]
				 , [unEBNF body RuleMode_Ref (nop, R)]
				 , [";"]
				 ]
instance IsString (EBNF String) where
	fromString = string
instance Show (EBNF a) where
	show = Text.unpack . runEBNF
instance Gram_Rule EBNF where
	rule n p = EBNF $ \rm po ->
		case rm of
		 RuleMode_Body -> unEBNF p RuleMode_Ref po
		 RuleMode_Ref -> n
	rule1 n p a = EBNF $ \rm po ->
		case rm of
		 RuleMode_Body -> unEBNF (p a) RuleMode_Ref po
		 RuleMode_Ref  -> unEBNF (ebnf_const n `ebnf_arg` a) RuleMode_Ref po
	rule2 n p a b = EBNF $ \rm po ->
		case rm of
		 RuleMode_Body -> unEBNF (p a b) RuleMode_Ref po
		 RuleMode_Ref  -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b) RuleMode_Ref po
	rule3 n p a b c = EBNF $ \rm po ->
		case rm of
		 RuleMode_Body -> unEBNF (p a b c) RuleMode_Ref po
		 RuleMode_Ref  -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b `ebnf_arg` c) RuleMode_Ref po
	rule4 n p a b c d = EBNF $ \rm po ->
		case rm of
		 RuleMode_Body -> unEBNF (p a b c d) RuleMode_Ref po
		 RuleMode_Ref  -> unEBNF (ebnf_const n `ebnf_arg` a `ebnf_arg` b `ebnf_arg` c `ebnf_arg` d) RuleMode_Ref po
instance Functor EBNF where
	fmap _f (EBNF x) = EBNF x
instance Applicative EBNF where
	pure _ = empty
	EBNF f <*> EBNF x = EBNF $ \bo po -> op_paren po op $
		f bo (op, L) <> ", " <> x bo (op, R)
		where op = Op "," 10 AssocB
instance App EBNF
instance Alternative EBNF where
	empty = ebnf_const $ "\"\""
	EBNF x <|> EBNF y = EBNF $ \bo po -> op_paren po op $
		x bo (op, L) <> " | " <> y bo (op, R)
		where op = Op "|" 2 AssocB
	many (EBNF x) = EBNF $ \rm _po -> "{ " <> x rm (op, L) <> " }"  where op = nop
	some (EBNF x) = EBNF $ \rm _po -> "{ " <> x rm (op, L) <> " }-" where op = nop
instance Alter EBNF where
	choice [] = empty
	choice [p] = p
	choice l@(_:_) = EBNF $ \bo po -> op_paren po op $
		Text.intercalate " | " $
		(unEBNF <$> l) <*> pure bo <*> pure (op, L)
		where op = Op "|" 2 AssocB
instance Alt EBNF
instance Gram_Term EBNF where
	any  = ebnf_const "_"
	eof  = ebnf_const "EOF"
	char = ebnf_const . escape
		where
		escape c | Char.isPrint c && c /= '"' = Text.concat $ ["\"", Text.singleton c, "\""]
		escape c = Text.concat ["U+", Text.pack $ show $ ord c]
	string s =
		case List.break (\c -> not (Char.isPrint c) || c == '"') s of
		 (ps, "")   -> raw ps
		 ("", [c])  -> "" <$ char c
		 (ps, [c])  -> "" <$ raw ps <* char c
		 ("", c:rs) -> "" <$ char c <* string rs
		 (ps, c:rs) -> "" <$ raw ps <* char c <* string rs
		where
		raw cs = ebnf_const $ Text.concat $ ["\"", Text.pack cs, "\""]
	unicat = ebnf_const . Text.pack . show
	range (l, h) = ebnf_const $ Text.concat
		 [ runEBNF $ char l
		 , "…"
		 , runEBNF $ char h
		 ]
instance Gram_RegR EBNF where
	Term f .*> Reg x = Reg $ f <*> x
	manyR = Reg . many . unTerm
	someR = Reg . some . unTerm
instance Gram_RegL EBNF where
	Reg f <*. Term x = Reg $ f <*> x
	manyL = Reg . many . unTerm
	someL = Reg . some . unTerm
instance Gram_CF EBNF where
	CF (EBNF f) <& Reg (EBNF p) = CF $ EBNF $ \bo po -> op_paren po op $
		f bo (op, L) <> " & " <> p bo (op, R)
		where op = Op "&" 4 AssocL
	Reg (EBNF f) &> CF (EBNF p) = CF $ EBNF $ \bo po -> op_paren po op $
		f bo (op, L) <> " & " <> p bo (op, R)
		where op = Op "&" 4 AssocL
	CF (EBNF f) `but` Reg (EBNF p) = CF $ EBNF $ \bo po -> op_paren po op $
		f bo (op, L) <> " - " <> p bo (op, R)
		where op = Op "-" 6 AssocL

-- ** Type 'RuleMode'
data RuleMode
 =   RuleMode_Body -- ^ Generate the body of the rule.
 |   RuleMode_Ref  -- ^ Generate a ref to the rule.
 deriving (Eq, Show)

-- ** Type 'Op'
data Op = Op
 { op_ident :: Text
 , op_prece :: Precedence
 , op_assoc :: Associativity
 } deriving (Eq, Show)

nop :: Op
nop = Op "" 0 AssocN

-- *** Type 'Precedence'
type Precedence = Int

-- *** Type 'Associativity'
data Associativity
 = AssocL | AssocR | AssocN | AssocB
 deriving (Eq, Show)

op_paren
 :: (Semigroup s, IsString s)
 => (Op, LR) -> Op -> s -> s
op_paren (po, lr) op s =
	if op_prece op <= op_prece po && not associate
	 then fromString "(" <> s <> fromString ")"
	 else s
	where
	associate =
		op_ident po == op_ident op &&
		case (lr, op_assoc po) of
		 (_, AssocB) -> True
		 (L, AssocL) -> True
		 (R, AssocR) -> True
		 _           -> False

-- * Class 'Gram_Context'
-- | A monadic backdoor, but limited by 'Context'.
-- In 'CF', the context must obviously not be used to change the parser,
-- but it can be used to change the parsed value,
-- for instance by recording source positions into it.
class Gram_Context p where
	type Context p
	type Context p = ()
	default context :: (Context p ~ ()) => (Context p -> p a) -> p a
	context :: (Context p -> p a) -> p a
	context f = f ()
instance Gram_Context p => Gram_Context (CF p) where
	type Context (CF p) = Context p
	context f = CF $ context (unCF . f)
instance Gram_Context EBNF
instance Gram_Context RuleDef

-- * Class 'Gram_Lexer'
class
 ( Alt p
 , Alter p
 , Alternative p
 , App p
 , Gram_CF p
 , Gram_Rule p
 , Gram_Term p
 ) => Gram_Lexer p where
	commentable :: p () -> p () -> p () -> p ()
	commentable = rule3 "commentable" $ \p line block ->
		skipMany $ choice [p, line, block]
	comment_line :: CF p String -> CF p String
	comment_line prefix = rule "comment_line" $
		prefix *> many (any `but` (void (char '\n') <+> eof))
	comment_block :: CF p String -> Reg lr p String -> CF p String
	comment_block start end = rule "comment_block" $
		start *> many (any `but` void end)
	lexeme :: CF p a -> CF p a
	lexeme = rule1 "lexeme" $ \p -> p
		 <* commentable
			 (void $ char ' ')
			 (void $ comment_line (string "--"))
			 (void $ comment_block (string "{-") (string "-}"))
	parens :: CF p a -> CF p a
	parens = rule1 "parens" $
		between
		 (lexeme $ string "(")
		 (lexeme $ string ")")
	infixrP :: (a -> a -> a) -> CF p a -> CF p sep -> CF p a -> CF p a
	infixrP f =
		rule3 "infixrP" $ \next sep root ->
			(\a -> \case Just b -> f a b; Nothing -> a)
			 <$> next <*> option Nothing (Just <$ sep <*> root)
	inside :: (a -> b) -> CF p begin -> CF p a -> CF p end -> CF p b -> CF p b
	inside f = rule4 "inside" $ \begin i end n ->
		(f <$ begin <*> i <* end) <+> n
	symbol :: String -> CF p String
	symbol = lexeme . string

deriving instance Gram_Lexer p => Gram_Lexer (CF p)
instance Gram_Lexer EBNF
instance Gram_Lexer RuleDef

gram_lexer :: forall p . (Gram_Lexer p, Gram_RuleDef p) => [CF p ()]
gram_lexer =
 [ () <$ commentable (void $ rule_arg "space") (void $ rule_arg "line") (void $ rule_arg "block")
 , () <$ comment_line (rule_arg "prefix")
 , () <$ comment_block (rule_arg "start") (rule_arg "end" :: Reg 'L p String)
 , () <$ lexeme (rule_arg "p")
 , () <$ parens (rule_arg "p")
 , () <$ inside id (rule_arg "begin") (rule_arg "i") (rule_arg "end") (rule_arg "next")
 , () <$ infixrP const (rule_arg "next") (rule_arg "sep") (rule_arg "root")
 ]