{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE Rank2Types #-} {-# LANGUAGE TypeFamilies #-} module Language.TCT.Read.Token where import Control.Applicative (Applicative(..), Alternative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Either (Either(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.)) import Data.Functor ((<$>), ($>), (<$)) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (ViewL(..), (<|)) import Data.Text (Text) import Data.Text.Buildable (Buildable(..)) import Data.Tuple (fst,snd) import Text.Show (Show(..)) import qualified Data.Char as Char import qualified Data.Sequence as Seq import qualified Data.Set as Set import qualified Data.Text as Text import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as Builder import qualified Text.Megaparsec as P import qualified Text.Megaparsec.Prim as P import Language.TCT.Token import Language.TCT.Elem -- hiding (dbg) import Language.TCT.Read.Elem -- hiding (pdbg) {- import Debug.Trace (trace) dbg m x = trace (m <> ": " <> show x) x pdbg m p = P.dbg m p -} textOf :: Buildable a => a -> Text textOf = TL.toStrict . Builder.toLazyText . build -- * Type 'Pairs' type Pairs = (Tokens,[(Pair,Tokens)]) appendToken :: Pairs -> Token -> Pairs appendToken ps = appendTokens ps . Tokens . Seq.singleton appendTokens :: Pairs -> Tokens -> Pairs appendTokens (t,[]) toks = (t<>toks,[]) appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps) openPair :: Pairs -> Pair -> Pairs openPair (t,ms) p = (t,(p,mempty):ms) -- | Close a 'Pair' when there is a matching 'LexemePairClose'. closePair :: Pairs -> Pair -> Pairs closePair (t,[]) p = dbg "closePair" $ (t<>tokens1 (TokenPlain $ snd $ pairBorders p tokensPlainEmpty),[]) closePair (t,(p1,t1):ts) p = dbg "closePair" $ case (p,p1) of (PairElem x ax, PairElem y ay) | x == y -> appendToken (t,ts) $ TokenPair (PairElem x (ax<>ay)) t1 (x,y) | x == y -> appendToken (t,ts) $ TokenPair p1 t1 _ -> (`closePair` p) $ appendTokens (t,ts) (closeUnpaired mempty (p1,t1)) -- | Close a 'Pair' when there is not a matching 'LexemePairClose'. closeUnpaired :: Tokens -> (Pair,Tokens) -> Tokens closeUnpaired acc (p,toks) = dbg "closeUnpaired" $ case p of -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'. PairHash | TokenPlain t :< ts <- Seq.viewl $ unTokens $ toks <> acc -> case Text.findIndex (not . isTagChar) t of Just 0 -> toksHash <> toks <> acc Just i -> Tokens $ TokenTag tag <| TokenPlain t' <| ts where (tag,t') = Text.splitAt i t Nothing | Text.null t -> toksHash <> toks <> acc Nothing -> Tokens $ TokenTag t <| ts where toksHash = tokens1 $ TokenPlain $ fst $ pairBorders p mempty _ -> tokens1 (TokenPlain $ fst $ pairBorders p tokensPlainEmpty) <> toks <> acc where isTagChar c = Char.isAlphaNum c || c=='·' || case Char.generalCategory c of Char.DashPunctuation -> True Char.ConnectorPunctuation -> True _ -> False -- | Close remaining 'Pair's at end of parsing. closePairs :: Pairs -> Tokens closePairs (t0,ps) = dbg "closePairs" $ t0 <> foldl' closeUnpaired mempty ps appendLexeme :: Lexeme -> Pairs -> Pairs appendLexeme lex acc = dbg "appendLexeme" $ case dbg "appendLexeme" lex of LexemePairOpen ps -> foldl' open acc ps where open a p@PairElem{} = openPair a p `appendToken` TokenPlain "" open a p = openPair a p LexemePairClose ps -> foldl' closePair acc ps LexemePairAny ps -> appendTokens acc $ tokens $ TokenPlain . fst . (`pairBorders` mempty) <$> ps LexemePairBoth ps -> appendTokens acc $ tokens $ (`TokenPair`mempty) <$> ps LexemeEscape c -> appendToken acc $ TokenEscape c LexemeLink t -> appendToken acc $ TokenLink t LexemeWhite "" -> acc LexemeWhite cs -> appendToken acc $ TokenPlain cs LexemeAlphaNum cs -> appendToken acc $ TokenPlain $ Text.pack cs LexemeChar c -> appendToken acc $ TokenPlain $ Text.singleton c LexemeToken ts -> appendTokens acc ts -- * Type 'Lexeme' data Lexeme = LexemePairOpen ![Pair] | LexemePairClose ![Pair] | LexemePairAny ![Pair] | LexemePairBoth ![Pair] | LexemeEscape !Char | LexemeLink !Text | LexemeWhite !White | LexemeAlphaNum ![Char] | LexemeChar !Char | LexemeToken !Tokens deriving (Eq, Show) p_satisfyMaybe :: (P.MonadParsec e s m, P.Token s ~ Char) => (Char -> Maybe a) -> m a p_satisfyMaybe f = P.token testChar Nothing where testChar c = case f c of Just a -> Right a Nothing -> Left (Set.singleton $ P.Tokens $ c:|[], Set.empty, Set.empty) p_Tokens :: Parser e s Tokens p_Tokens = pdbg "Tokens" $ closePairs . foldr appendLexeme mempty . dbg "Lexemes" . mangleLexemes . (LexemeWhite "" :) <$> go [LexemeWhite ""] where go :: [Lexeme] -> Parser e s [Lexeme] go acc = (P.eof $> acc) <|> (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc) mangleLexemes = \case w@LexemeWhite{} :p@LexemePairAny{}:acc -> w:any2close p:acc p@LexemePairAny{}:w@LexemeWhite{} :acc -> any2open p:w:acc l@LexemeAlphaNum{}:c@LexemeChar{} :p@LexemePairAny{}:acc -> l:c:any2close p:acc l@LexemeAlphaNum{}:p@LexemePairAny{}:c@LexemeChar{}:acc -> l:any2open p:c:acc acc -> acc any2close,any2open :: Lexeme -> Lexeme any2close (LexemePairAny ps) = LexemePairClose ps any2close c = c any2open (LexemePairAny ps) = LexemePairOpen ps any2open c = c pairAny :: Char -> Maybe Pair pairAny = \case '-' -> Just PairDash '/' -> Just PairSlash '"' -> Just PairDoublequote '\'' -> Just PairSinglequote '`' -> Just PairBackquote '_' -> Just PairUnderscore '*' -> Just PairStar '#' -> Just PairHash _ -> Nothing pairOpen :: Char -> Maybe Pair pairOpen = \case '(' -> Just PairParen '[' -> Just PairBracket '{' -> Just PairBrace '«' -> Just PairFrenchquote _ -> Nothing pairClose :: Char -> Maybe Pair pairClose = \case ')' -> Just PairParen ']' -> Just PairBracket '}' -> Just PairBrace '»' -> Just PairFrenchquote _ -> Nothing p_Lexeme :: Parser e s Lexeme p_Lexeme = pdbg "Lexeme" $ P.choice [ P.try $ LexemeWhite <$> p_Spaces , P.try $ LexemePairAny <$> P.some (p_satisfyMaybe pairAny) , P.try $ LexemePairBoth <$> P.some (P.try p_ElemSingle) , P.try $ LexemePairOpen <$> P.some (p_satisfyMaybe pairOpen <|> P.try p_ElemOpen) , P.try $ LexemePairClose <$> P.some (p_satisfyMaybe pairClose <|> P.try p_ElemClose) , P.try $ LexemeEscape <$> p_Escape , P.try $ LexemeLink <$> p_Link , P.try $ LexemeAlphaNum <$> P.some p_AlphaNum , LexemeChar <$> P.anyChar ] p_AlphaNum :: Parser e s Char p_AlphaNum = P.satisfy Char.isAlphaNum p_Escape :: Parser e s Char p_Escape = P.char '\\' *> P.satisfy Char.isPrint p_Link :: Parser e s Text p_Link = (\scheme ss addr -> Text.pack $ scheme <> ss <> addr) <$> P.option "" (P.try p_scheme) <*> P.string "//" <*> p_addr where p_scheme = (<>) <$> P.some (P.satisfy $ \c -> Char.isAlphaNum c || c=='_' || c=='-' || c=='+') <*> P.string ":" p_addr = P.many $ P.satisfy $ \c -> Char.isAlphaNum c || c=='%' || c=='/' || c=='(' || c==')' || c=='-' || c=='_' || c=='.' || c=='#' || c=='?' || c=='=' p_ElemSingle :: Parser e s Pair p_ElemSingle = pdbg "ElemSingle" $ PairElem <$ P.char '<' <*> p_Word <*> p_Attrs <* P.string "/>" p_ElemOpen :: Parser e s Pair p_ElemOpen = pdbg "ElemOpen" $ PairElem <$ P.char '<' <*> p_Word <*> p_Attrs <* P.char '>' p_ElemClose :: Parser e s Pair p_ElemClose = pdbg "ElemClose" $ (`PairElem` []) <$ P.string " p_Word <* P.char '>' {- p_ElemOpenOrSingle :: Parser e s Pair p_ElemOpenOrSingle = p_ElemOpen >>= \p -> P.char '>' $> LexemePairOpen p <|> P.string "/>" $> LexemePairAny p -}