{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE Rank2Types #-} {-# LANGUAGE TypeFamilies #-} module Language.TCT.Read.Token where import Control.Applicative (Applicative(..), Alternative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Eq (Eq(..)) import Data.Foldable (foldr, foldl') import Data.Function (($), (.), flip) import Data.Functor ((<$>), ($>), (<$)) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Text (Text) import Data.Text.Buildable (Buildable(..)) import Data.Tuple (fst,snd) import Text.Show (Show(..)) import qualified Data.Char as Char import qualified Data.Text as Text import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as Builder import qualified Text.Megaparsec as P import Language.TCT.Token import Language.TCT.Elem import Language.TCT.Read.Elem textOf :: Buildable a => a -> Text textOf = TL.toStrict . Builder.toLazyText . build -- * Type 'Groups' type Groups = (Token,[(Group,Token)]) openGroup :: Group -> Groups -> Groups openGroup g (t,ms) = (t,(g,mempty):ms) groupToken :: Token -> Groups -> Groups groupToken mrk (t,[]) = (t<>mrk,[]) groupToken mrk (t,(g0,m0):gs) = (t,(g0,m0<>mrk):gs) closeGroup :: Group -> Groups -> Groups closeGroup g (t,[]) = (t<>TokenPlain (snd $ groupBorders g mempty),[]) closeGroup g (t,(g1,m1):ms) = case (g,g1) of (GroupElem x ax, GroupElem y ay) | x == y -> groupToken (TokenGroup (GroupElem x (ax<>ay)) m1) (t,ms) (x,y) | x == y -> groupToken (TokenGroup g1 m1) (t,ms) _ -> closeGroup g $ groupToken (TokenPlain (fst $ groupBorders g1 mempty) <> m1) (t,ms) closeGroups :: Groups -> Token closeGroups grps = let (m0,gs) = appendLexeme (LexemeWhite "") grps in foldr (\(g,t) acc -> acc <> TokenPlain (fst $ groupBorders g mempty) <> t) m0 gs -- * Type 'Lexeme' data Lexeme = LexemeGroupOpen Group | LexemeGroupClose Group | LexemeGroupPlain Char | LexemeWhite Text | LexemeWord Text | LexemeToken Token | LexemeEscape Char | LexemeLink Text deriving (Show, Eq) appendLexeme :: Lexeme -> Groups -> Groups appendLexeme lex gs = case dbg "appendLexeme" lex of _ | (tok,(GroupHash,tag):gs') <- gs , (case lex of LexemeWord{} -> False LexemeEscape{} -> False LexemeGroupClose GroupHash -> False _ -> True) -> appendLexeme lex $ groupToken (TokenTag (textOf tag)) (tok,gs') LexemeGroupOpen g -> openGroup g gs LexemeGroupClose g -> closeGroup g gs LexemeGroupPlain c -> groupToken (TokenPlain (Text.singleton c)) gs LexemeWhite wh -> groupToken (TokenPlain wh) gs LexemeWord wo -> groupToken (TokenPlain wo) gs LexemeToken tok -> groupToken tok gs LexemeEscape c -> groupToken (TokenEscape c) gs LexemeLink lnk -> groupToken (TokenLink lnk) gs appendLexemes :: Groups -> [Lexeme] -> Groups appendLexemes = foldl' (flip appendLexeme) -- * Parsers p_Token :: Parser e s Token p_Token = closeGroups <$> p_Groups (mempty,[]) p_Groups :: Groups -> Parser e s Groups p_Groups gs = pdbg "Groups" $ (<|>) (p_Lexemes >>= p_Groups . appendLexemes gs) (P.eof $> gs) p_Lexemes :: Parser e s [Lexeme] p_Lexemes = pdbg "Lexemes" $ P.choice [ P.try p_Escape , P.try p_Elem , P.try $ pure <$> p_Link , P.try $ (<>) <$> ({-pure <$> P.try p_Link <|>-} P.some p_GroupClose) <*> (pure <$> p_White <|> P.eof $> []) , P.try $ (:) <$> p_White <*> ({-pure <$> P.try p_Link <|>-} P.some p_GroupOpen) , P.try $ pure <$> (p_PunctOrSym >>= \c -> P.option (LexemeGroupPlain c) $ p_GroupOpenOrClose c) , P.try $ pure <$> p_White , pure . LexemeWord <$> p_Word ] p_White :: Parser e s Lexeme p_White = pdbg "White" $ LexemeWhite <$> p_Spaces p_PunctOrSym :: Parser e s Char p_PunctOrSym = P.satisfy $ \c -> Char.isPunctuation c || Char.isSymbol c p_GroupOpenOrClose :: Char -> Parser e s Lexeme p_GroupOpenOrClose c = pdbg "GroupOpenClose" $ do case c of '(' -> return $ LexemeGroupOpen GroupParen '[' -> return $ LexemeGroupOpen GroupBracket '{' -> return $ LexemeGroupOpen GroupBrace '«' -> return $ LexemeGroupOpen GroupFrenchquote ')' -> return $ LexemeGroupClose GroupParen ']' -> return $ LexemeGroupClose GroupBracket '}' -> return $ LexemeGroupClose GroupBrace '»' -> return $ LexemeGroupClose GroupFrenchquote _ -> fail "GroupOpenOrClose" p_GroupOpen :: Parser e s Lexeme p_GroupOpen = pdbg "GroupOpen" $ do c <- p_PunctOrSym case c of '/' -> open GroupSlash '-' -> open GroupDash '"' -> open GroupDoublequote '\'' -> open GroupSinglequote '`' -> open GroupBackquote '_' -> open GroupUnderscore '*' -> open GroupStar '#' -> open GroupHash _ -> p_GroupOpenOrClose c where open = return . LexemeGroupOpen p_GroupClose :: Parser e s Lexeme p_GroupClose = pdbg "GroupClose" $ do c <- p_PunctOrSym case c of '/' -> close GroupSlash '-' -> close GroupDash '"' -> close GroupDoublequote '\'' -> close GroupSinglequote '`' -> close GroupBackquote '_' -> close GroupUnderscore '*' -> close GroupStar '#' -> close GroupHash _ -> p_GroupOpenOrClose c where close = return . LexemeGroupClose p_Link :: Parser e s Lexeme p_Link = (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr) <$> P.option "" (P.try p_scheme) <*> P.string "//" <*> p_addr where p_scheme = (<>) <$> P.some ( P.satisfy $ \c -> Char.isPrint c && not (Char.isSpace c) && c/='/' && c/=':') <*> P.string ":" p_addr = P.many $ P.satisfy $ \c -> Char.isPrint c && not (Char.isSpace c) && c/='<' && c/='>' p_Escape :: Parser e s [Lexeme] p_Escape = pure . LexemeEscape <$ P.char '\\' <*> P.satisfy Char.isPrint p_Elem :: Parser e s [Lexeme] p_Elem = pdbg "Elem" $ P.char '<' >> (p_close <|> p_open) where p_open = (\e as oc -> case oc of True -> [ LexemeGroupOpen $ GroupElem e as , LexemeToken $ Tokens mempty -- same elem for open and close , LexemeGroupClose $ GroupElem e [] ] False -> [LexemeGroupOpen $ GroupElem e as]) <$> p_Word <*> p_Attrs <*> P.option False (True <$ P.char '/') <* P.char '>' p_close = (\e -> [LexemeGroupClose $ GroupElem e []]) <$ P.char '/' <*> p_Word <* P.char '>'