{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE Rank2Types #-} {-# LANGUAGE TypeFamilies #-} module Language.TCT.Read.Token where import Control.Applicative (Applicative(..), Alternative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.), flip) import Data.Functor ((<$>), ($>), (<$)) import Data.Maybe (Maybe(..), fromMaybe) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Text (Text) import Data.Text.Buildable (Buildable(..)) import Data.Tuple (fst,snd) import Text.Show (Show(..)) import qualified Data.Char as Char import qualified Data.Text as Text import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as Builder import qualified Text.Megaparsec as P import Language.TCT.Token import Language.TCT.Elem import Language.TCT.Read.Elem textOf :: Buildable a => a -> Text textOf = TL.toStrict . Builder.toLazyText . build -- * Type 'Groups' type Groups = (Token,[(Group,Token)]) openGroup :: Group -> Groups -> Groups openGroup g (t,ms) = (t,(g,mempty):ms) groupToken :: Token -> Groups -> Groups groupToken mrk (t,[]) = (t<>mrk,[]) groupToken mrk (t,(g0,m0):gs) = (t,(g0,m0<>mrk):gs) closeGroup :: Group -> Groups -> Groups closeGroup g (t,[]) = (t<>TokenPlain (snd $ groupBorders g mempty),[]) closeGroup g (t,(g1,m1):ms) = case (g,g1) of (GroupElem x ax, GroupElem y ay) | x == y -> groupToken (TokenGroup (GroupElem x (ax<>ay)) m1) (t,ms) (x,y) | x == y -> groupToken (TokenGroup g1 m1) (t,ms) _ -> closeGroup g $ groupToken (TokenPlain (fst $ groupBorders g1 mempty) <> m1) (t,ms) closeGroups :: Groups -> Token closeGroups grps = let (m0,gs) = appendLexeme (LexemeWhite "") grps in foldr (\(g,t) acc -> acc <> TokenPlain (fst $ groupBorders g mempty) <> t) m0 gs -- * Type 'Lexeme' data Lexeme = LexemeGroupOpen Group | LexemeGroupClose Group | LexemeGroupPlain Char | LexemeWhite Text | LexemeWord Text | LexemeToken Token | LexemeEscape Char | LexemeLink Text deriving (Show, Eq) appendLexeme :: Lexeme -> Groups -> Groups appendLexeme lex gs = case dbg "appendLexeme" lex of _ | (tok,(GroupHash,tag):gs') <- gs , (case lex of LexemeWord{} -> False LexemeEscape{} -> False LexemeGroupClose GroupHash -> False _ -> True) -> appendLexeme lex $ groupToken (TokenTag (textOf tag)) (tok,gs') LexemeGroupOpen g -> openGroup g gs LexemeGroupClose g -> closeGroup g gs LexemeGroupPlain c -> groupToken (TokenPlain (Text.singleton c)) gs LexemeWhite wh -> groupToken (TokenPlain wh) gs LexemeWord wo -> groupToken (TokenPlain wo) gs LexemeToken tok -> groupToken tok gs LexemeEscape c -> groupToken (TokenEscape c) gs LexemeLink lnk -> groupToken (TokenLink lnk) gs appendLexemes :: Groups -> [Lexeme] -> Groups appendLexemes = foldl' (flip appendLexeme) -- * Parsers p_Token :: Parser e s Token p_Token = closeGroups <$> p_Groups (mempty,[]) p_Groups :: Groups -> Parser e s Groups p_Groups gs = pdbg "Groups" $ (<|>) (p_Lexemes (mempty == gs) >>= p_Groups . appendLexemes gs) (P.eof $> gs) p_Lexemes :: Bool -> Parser e s [Lexeme] p_Lexemes isBOF = pdbg "Lexemes" $ P.choice [ P.try $ p_GroupClose , P.try $ p_GroupOpen isBOF , P.try $ p_GroupOpenOrClose l_GroupOpenOrClose , P.try $ pure <$> p_White , pure . LexemeWord <$> p_Word ] p_White :: Parser e s Lexeme p_White = pdbg "White" $ LexemeWhite <$> p_Spaces p_PunctOrSym :: Parser e s Char p_PunctOrSym = P.satisfy $ \c -> Char.isPunctuation c || Char.isSymbol c p_GroupOpen :: Bool -> Parser e s [Lexeme] p_GroupOpen isBOF = pdbg "GroupOpen" $ do wh <- if isBOF then return [] else pure <$> p_White ps <- mconcat <$> P.some (P.try $ p_GroupOpenOrClose $ l_GroupOpenAndClose LexemeGroupOpen) return $ wh<>ps p_GroupClose :: Parser e s [Lexeme] p_GroupClose = pdbg "GroupClose" $ do ps <- mconcat <$> P.some (P.try $ p_GroupOpenOrClose $ l_GroupOpenAndClose LexemeGroupClose) wh <- pure <$> p_White <|> P.eof $> [] return $ ps<>wh p_GroupOpenOrClose :: (Char -> Maybe Lexeme) -> Parser e s [Lexeme] p_GroupOpenOrClose grp = pdbg "GroupOpenOrClose" $ do P.choice [ P.try p_Elem , P.try (pure <$> p_Escape) , P.try (pure <$> p_Link) , (<$> p_PunctOrSym) $ \c -> pure $ LexemeGroupPlain c `fromMaybe` grp c ] l_GroupOpenAndClose :: (Group -> Lexeme) -> Char -> Maybe Lexeme l_GroupOpenAndClose lxm c = dbg "GroupOpenAndClose" $ case c of '/' -> Just $ lxm GroupSlash '-' -> Just $ lxm GroupDash '"' -> Just $ lxm GroupDoublequote '\'' -> Just $ lxm GroupSinglequote '`' -> Just $ lxm GroupBackquote '_' -> Just $ lxm GroupUnderscore '*' -> Just $ lxm GroupStar '#' -> Just $ lxm GroupHash _ -> l_GroupOpenOrClose c l_GroupOpenOrClose :: Char -> Maybe Lexeme l_GroupOpenOrClose c = dbg "GroupOpenOrClose" $ case c of '(' -> Just $ LexemeGroupOpen GroupParen '[' -> Just $ LexemeGroupOpen GroupBracket '{' -> Just $ LexemeGroupOpen GroupBrace '«' -> Just $ LexemeGroupOpen GroupFrenchquote ')' -> Just $ LexemeGroupClose GroupParen ']' -> Just $ LexemeGroupClose GroupBracket '}' -> Just $ LexemeGroupClose GroupBrace '»' -> Just $ LexemeGroupClose GroupFrenchquote _ -> Nothing p_Link :: Parser e s Lexeme p_Link = (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr) <$> P.option "" (P.try p_scheme) <*> P.string "//" <*> p_addr where p_scheme = (<>) <$> P.some (P.satisfy $ \c -> Char.isAlphaNum c || c=='_' || c=='-' || c=='+') <*> P.string ":" p_addr = P.many $ P.satisfy $ \c -> Char.isAlphaNum c || c=='%' || c=='/' || c=='(' || c==')' || c=='-' || c=='_' || c=='.' p_Escape :: Parser e s Lexeme p_Escape = LexemeEscape <$ P.char '\\' <*> P.satisfy Char.isPrint p_Elem :: Parser e s [Lexeme] p_Elem = pdbg "Elem" $ P.char '<' >> (p_close <|> p_open) where p_open = (\e as oc -> case oc of True -> [ LexemeGroupOpen $ GroupElem e as , LexemeToken $ Tokens mempty -- same elem for open and close , LexemeGroupClose $ GroupElem e [] ] False -> [LexemeGroupOpen $ GroupElem e as]) <$> p_Word <*> p_Attrs <*> P.option False (True <$ P.char '/') <* P.char '>' p_close = (\e -> [LexemeGroupClose $ GroupElem e []]) <$ P.char '/' <*> p_Word <* P.char '>'