{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE Rank2Types #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ViewPatterns #-} module Language.TCT.Read.Token where import Control.Applicative (Applicative(..), Alternative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.)) import Data.Functor ((<$>), ($>), (<$)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (ViewL(..), (<|)) import Data.Text (Text) import Data.Text.Buildable (Buildable(..)) import Data.Tuple (fst,snd) import Prelude (Num(..)) import Text.Show (Show(..)) import qualified Data.Char as Char import qualified Data.Sequence as Seq import qualified Data.Text as Text import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as Builder import qualified Text.Megaparsec as P import qualified Text.Megaparsec.Char as P import Language.TCT.Token import Language.TCT.Cell import Language.TCT.Elem import Language.TCT.Read.Elem import Language.TCT.Read.Cell textOf :: Buildable a => a -> Text textOf = TL.toStrict . Builder.toLazyText . build -- * Type 'Pairs' type Pairs = (Tokens,[(Cell Pair,Tokens)]) appendToken :: Pairs -> Cell Token -> Pairs appendToken ps = appendTokens ps . Seq.singleton appendTokens :: Pairs -> Tokens -> Pairs appendTokens (t,[]) toks = (t<>toks,[]) appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps) openPair :: Pairs -> Cell Pair -> Pairs openPair (t,ms) p = (t,(p,mempty):ms) -- | Close a 'Pair' when there is a matching 'LexemePairClose'. closePair :: Pairs -> Cell Pair -> Pairs closePair ps@(_,[]) (Cell bp ep p) = dbg "closePair" $ appendToken ps $ Cell bp ep $ TokenPlain $ snd $ pairBorders p tokensPlainEmpty closePair (t,(p1,t1):ts) p = dbg "closePair" $ case (p1,p) of (Cell bx _ex (PairElem x ax), Cell _by ey (PairElem y ay)) | x == y -> appendToken (t,ts) $ Cell bx ey $ TokenPair (PairElem x (ax<>ay)) t1 (Cell bx _ex x, Cell _by ey y) | x == y -> appendToken (t,ts) $ Cell bx ey $ TokenPair x t1 _ -> (`closePair` p) $ appendTokens (t,ts) (closeUnpaired mempty (p1,t1)) -- | Close a 'Pair' when there is not a matching 'LexemePairClose'. closeUnpaired :: Tokens -> (Cell Pair,Tokens) -> Tokens closeUnpaired acc (Cell bp ep p,toks) = dbg "closeUnpaired" $ case p of -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'. PairHash | (Cell bt et (TokenPlain t)) :< ts <- Seq.viewl $ toks <> acc -> case Text.findIndex (not . isTagChar) t of -- Just 0 -> toksHash mempty <> toks <> acc Just i -> Cell bp bt{columnPos = columnPos bt + i} (TokenTag tag) <| Cell bt{columnPos = columnPos bt + i + 1} et (TokenPlain t') <| ts where (tag,t') = Text.splitAt i t Nothing | Text.null t -> toksHash mempty <> toks <> acc Nothing -> Cell bp et (TokenTag t) <| ts _ -> toksHash tokensPlainEmpty <> toks <> acc where toksHash = tokens1 . Cell bp ep . TokenPlain . fst . pairBorders p isTagChar c = Char.isAlphaNum c || c=='·' || case Char.generalCategory c of Char.DashPunctuation -> True Char.ConnectorPunctuation -> True _ -> False -- | Close remaining 'Pair's at end of parsing. closePairs :: Pairs -> Tokens closePairs (t0,ps) = dbg "closePairs" $ t0 <> foldl' closeUnpaired mempty ps appendLexeme :: Lexeme -> Pairs -> Pairs appendLexeme lex acc = dbg "appendLexeme" $ case lex of LexemePairOpen ps -> foldl' open acc ps where open a p@(Cell _bp ep (PairElem{})) = openPair a p `appendToken` (Cell ep ep $ TokenPlain "") open a p = openPair a p LexemePairClose ps -> foldl' closePair acc ps LexemePairAny ps -> appendTokens acc $ tokens $ ((\p -> TokenPlain $ fst $ pairBorders p mempty) <$>) <$> ps LexemePairBoth ps -> appendTokens acc $ tokens $ ((`TokenPair`mempty) <$>) <$> ps LexemeEscape c -> appendToken acc $ TokenEscape <$> c LexemeLink t -> appendToken acc $ TokenLink <$> t LexemeWhite (unCell -> "") -> acc LexemeWhite cs -> appendToken acc $ TokenPlain <$> cs LexemeAlphaNum cs -> appendToken acc $ TokenPlain . Text.pack <$> cs LexemeAny cs -> appendToken acc $ TokenPlain . Text.pack <$> cs LexemeToken ts -> appendTokens acc ts -- * Type 'Lexeme' data Lexeme = LexemePairOpen ![Cell Pair] | LexemePairClose ![Cell Pair] | LexemePairAny ![Cell Pair] | LexemePairBoth ![Cell Pair] | LexemeEscape !(Cell Char) | LexemeLink !(Cell Text) | LexemeWhite !(Cell White) | LexemeAlphaNum !(Cell [Char]) | LexemeAny !(Cell [Char]) | LexemeToken !Tokens deriving (Eq, Show) p_Tokens :: Parser e s Tokens p_Tokens = pdbg "Tokens" $ closePairs . foldr appendLexeme mempty . dbg "Lexemes" . mangleLexemes . (LexemeWhite (cell0 "") :) <$> go [LexemeWhite (cell0 "")] where go :: [Lexeme] -> Parser e s [Lexeme] go acc = (P.eof $> acc) <|> (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc) mangleLexemes = \case LexemeAny (Cell _bx ex x):LexemeAny (Cell by _ey y):acc -> LexemeAny (Cell by ex (x<>y)):acc -- "    w@LexemeWhite{}:LexemePairAny p:acc -> w:LexemePairClose p:acc --    " LexemePairAny p:w@LexemeWhite{}:acc -> LexemePairOpen p:w:acc --    ,,," LexemePairAny p:a@LexemeAny{}:w@LexemeWhite{}:acc -> LexemePairOpen p:a:w:acc -- ",,,    w@LexemeWhite{}:a@LexemeAny{}:LexemePairAny p:acc -> w:a:LexemePairClose p:acc -- ",,,AAA an@LexemeAlphaNum{}:a@LexemeAny{}:LexemePairAny p:acc -> an:a:LexemePairClose p:acc -- ,,,"AAA an@LexemeAlphaNum{}:LexemePairAny p:a@LexemeAny{}:acc -> an:LexemePairOpen p:a:acc -- ") c@LexemePairClose{}:LexemePairAny p:acc -> c:LexemePairClose p:acc -- (" LexemePairAny p:o@LexemePairOpen{}:acc -> LexemePairOpen p:o:acc -- "( o@LexemePairOpen{}:LexemePairAny p:acc -> o:LexemePairClose p:acc -- )" LexemePairAny p:c@LexemePairClose{}:acc -> c:LexemePairClose p:acc acc -> acc pairAny :: Char -> Maybe Pair pairAny = \case '-' -> Just PairDash '/' -> Just PairSlash '"' -> Just PairDoublequote '\'' -> Just PairSinglequote '`' -> Just PairBackquote '_' -> Just PairUnderscore '*' -> Just PairStar '#' -> Just PairHash _ -> Nothing pairOpen :: Char -> Maybe Pair pairOpen = \case '(' -> Just PairParen '[' -> Just PairBracket '{' -> Just PairBrace '«' -> Just PairFrenchquote _ -> Nothing pairClose :: Char -> Maybe Pair pairClose = \case ')' -> Just PairParen ']' -> Just PairBracket '}' -> Just PairBrace '»' -> Just PairFrenchquote _ -> Nothing p_Cell :: Parser e s a -> Parser e s (Cell a) p_Cell pa = do bp <- p_Position a <- pa ep <- p_Position return $ Cell bp ep a p_Lexeme :: Parser e s Lexeme p_Lexeme = pdbg "Lexeme" $ P.choice [ P.try $ LexemeWhite <$> p_Cell p_Spaces , P.try $ LexemePairAny <$> P.some (p_Cell $ p_satisfyMaybe pairAny) , P.try $ LexemePairBoth <$> P.some (P.try $ p_Cell p_ElemSingle) , P.try $ LexemePairOpen <$> P.some (p_Cell $ p_satisfyMaybe pairOpen <|> P.try p_ElemOpen) , P.try $ LexemePairClose <$> P.some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose) , P.try $ LexemeEscape <$> p_Cell p_Escape , P.try $ LexemeLink <$> p_Cell p_Link , P.try $ LexemeAlphaNum <$> p_Cell (P.some p_AlphaNum) , LexemeAny <$> p_Cell (pure <$> P.anyChar) ] p_AlphaNum :: Parser e s Char p_AlphaNum = P.satisfy Char.isAlphaNum p_Escape :: Parser e s Char p_Escape = P.char '\\' *> P.satisfy Char.isPrint p_Link :: Parser e s Text p_Link = P.try (P.char '<' *> p <* P.char '>') <|> p where p = (\scheme addr -> Text.pack $ scheme <> "//" <> addr) <$> P.option "" (P.try p_scheme) <* P.string "//" <*> p_addr p_scheme = (<> ":") <$> P.some (P.satisfy $ \c -> Char.isAlphaNum c || c=='_' || c=='-' || c=='+') <* P.char ':' p_addr = P.many $ P.satisfy $ \c -> Char.isAlphaNum c || c=='%' || c=='/' || c=='(' || c==')' || c=='-' || c=='_' || c=='.' || c=='#' || c=='?' || c=='=' p_ElemSingle :: Parser e s Pair p_ElemSingle = pdbg "ElemSingle" $ PairElem <$ P.char '<' <*> p_Word <*> p_Attrs <* P.string "/>" p_ElemOpen :: Parser e s Pair p_ElemOpen = pdbg "ElemOpen" $ PairElem <$ P.char '<' <*> p_Word <*> p_Attrs <* P.char '>' p_ElemClose :: Parser e s Pair p_ElemClose = pdbg "ElemClose" $ (`PairElem` []) <$ P.string " p_Word <* P.char '>' {- p_ElemOpenOrSingle :: Parser e s Pair p_ElemOpenOrSingle = p_ElemOpen >>= \p -> P.char '>' $> LexemePairOpen p <|> P.string "/>" $> LexemePairAny p -}