import Control.Monad (Monad(..))
import Data.Bool
import Data.Char (Char)
+import Data.Either (Either(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..))
-import Data.Function (($), (.), flip)
+import Data.Function (($), (.))
import Data.Functor ((<$>), ($>), (<$))
+import Data.List.NonEmpty (NonEmpty(..))
import Data.Maybe (Maybe(..))
import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Text.Show (Show(..))
import qualified Data.Char as Char
import qualified Data.Sequence as Seq
+import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as Builder
import qualified Text.Megaparsec as P
+import qualified Text.Megaparsec.Prim as P
import Language.TCT.Token
import Language.TCT.Elem -- hiding (dbg)
-- * Type 'Pairs'
type Pairs = (Tokens,[(Pair,Tokens)])
-openPair :: Pair -> Pairs -> Pairs
-openPair g (t,ms) = (t,(g,mempty):ms)
+appendToken :: Pairs -> Token -> Pairs
+appendToken ps = appendTokens ps . Tokens . Seq.singleton
-insertToken :: Token -> Pairs -> Pairs
-insertToken tok (t,[]) = (t<>tokens [tok],[])
-insertToken tok (t,(p0,t0):ps) = (t,(p0,t0<>tokens [tok]):ps)
+appendTokens :: Pairs -> Tokens -> Pairs
+appendTokens (t,[]) toks = (t<>toks,[])
+appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
-insertTokens :: Tokens -> Pairs -> Pairs
-insertTokens toks (t,[]) = (t<>toks,[])
-insertTokens toks (t,(p0,t0):ps) = (t,(p0,t0<>toks):ps)
+openPair :: Pairs -> Pair -> Pairs
+openPair (t,ms) p = (t,(p,mempty):ms)
-- | Close a 'Pair' when there is a matching 'LexemePairClose'.
-closePair :: Pair -> Pairs -> Pairs
-closePair p (t,[]) = dbg "closePair" $
- (t<>tokens [TokenPlain $ snd $ pairBorders p $ tokens [TokenPlain ""]],[])
-closePair p (t,(p1,t1):ts) = dbg "closePair" $
+closePair :: Pairs -> Pair -> Pairs
+closePair (t,[]) p = dbg "closePair" $
+ (t<>tokens1 (TokenPlain $ snd $ pairBorders p tokensPlainEmpty),[])
+closePair (t,(p1,t1):ts) p = dbg "closePair" $
case (p,p1) of
(PairElem x ax, PairElem y ay) | x == y ->
- insertToken (TokenPair (PairElem x (ax<>ay)) t1) (t,ts)
- (x,y) | x == y -> insertToken (TokenPair p1 t1) (t,ts)
+ appendToken (t,ts) $ TokenPair (PairElem x (ax<>ay)) t1
+ (x,y) | x == y -> appendToken (t,ts) $ TokenPair p1 t1
_ ->
- closePair p $
- insertTokens
- (closeUnpaired mempty (p1,t1))
+ (`closePair` p) $
+ appendTokens
(t,ts)
+ (closeUnpaired mempty (p1,t1))
-- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
closeUnpaired :: Tokens -> (Pair,Tokens) -> Tokens
-closeUnpaired acc (p,tn) = dbg "closeUnpaired" $
+closeUnpaired acc (p,toks) = dbg "closeUnpaired" $
case p of
-- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
- PairHash | TokenPlain t :< ts <- Seq.viewl $ unTokens $ tn <> acc ->
+ PairHash | TokenPlain t :< ts <- Seq.viewl $ unTokens $ toks <> acc ->
case Text.findIndex (not . isTagChar) t of
- Just 0 -> tokens [TokenPlain $ fst $ pairBorders p mempty] <> tn <> acc
+ Just 0 -> toksHash <> toks <> acc
Just i -> Tokens $ TokenTag tag <| TokenPlain t' <| ts
where (tag,t') = Text.splitAt i t
+ Nothing | Text.null t -> toksHash <> toks <> acc
Nothing -> Tokens $ TokenTag t <| ts
- _ -> tokens [TokenPlain $ fst $ pairBorders p $ tokens [TokenPlain ""]] <> tn <> acc
+ where toksHash = tokens1 $ TokenPlain $ fst $ pairBorders p mempty
+ _ -> tokens1 (TokenPlain $ fst $ pairBorders p tokensPlainEmpty) <> toks <> acc
where
isTagChar c =
Char.isAlphaNum c ||
closePairs (t0,ps) = dbg "closePairs" $
t0 <> foldl' closeUnpaired mempty ps
--- * Type 'Lexeme'
-data Lexeme
- = LexemePairOpen !Pair
- | LexemePairClose !Pair
- | LexemePunctOrSym !Char
- | LexemeWhite !Text
- | LexemeWord !Text
- | LexemeToken !Tokens
- | LexemeEscape !Char
- | LexemeLink !Text
- deriving (Show, Eq)
-
appendLexeme :: Lexeme -> Pairs -> Pairs
-appendLexeme lex ps =
+appendLexeme lex acc =
dbg "appendLexeme" $
case dbg "appendLexeme" lex of
- LexemePairOpen p -> openPair p ps
- LexemePairClose p -> closePair p ps
- LexemePunctOrSym c -> insertToken (TokenPlain (Text.singleton c)) ps
- LexemeWhite wh -> insertToken (TokenPlain wh) ps
- LexemeWord wo -> insertToken (TokenPlain wo) ps
- LexemeToken ts -> insertTokens ts ps
- LexemeEscape c -> insertToken (TokenEscape c) ps
- LexemeLink lnk -> insertToken (TokenLink lnk) ps
-
-appendLexemes :: Pairs -> [Lexeme] -> Pairs
-appendLexemes = foldl' (flip appendLexeme)
-
--- * Parsers
-
-p_Tokens :: Parser e s Tokens
-p_Tokens = closePairs <$> p_Pairs (mempty,[])
-
-p_Pairs :: Pairs -> Parser e s Pairs
-p_Pairs gs = pdbg "Pairs" $
- (p_Lexemes (mempty == gs) >>= p_Pairs . appendLexemes gs) <|>
- (P.eof $> gs)
-
-p_Lexemes :: Bool -> Parser e s [Lexeme]
-p_Lexemes isBOF = pdbg "Lexemes" $
- P.choice
- [ P.try $ p_PairCloseWhite
- , P.try $ p_PairWhiteOpen isBOF
- , P.try $ p_PairCloseBorder
- , P.try $ p_PairBorderOpen
- , P.try $ p_PairClose
- , P.try $ p_PairOpen
- , P.try $ pure . LexemePunctOrSym <$> p_PunctOrSym
- , P.try $ pure <$> p_White
- , pure . LexemeWord <$> p_Word
- ]
-
-p_White :: Parser e s Lexeme
-p_White = pdbg "White" $ LexemeWhite <$> p_Spaces
-
-p_PunctOrSym :: Parser e s Char
-p_PunctOrSym =
- P.satisfy $ \c ->
- Char.isPunctuation c ||
- Char.isSymbol c
+ LexemePairOpen ps -> foldl' open acc ps
+ where
+ open a p@PairElem{} = openPair a p `appendToken` TokenPlain ""
+ open a p = openPair a p
+ LexemePairClose ps -> foldl' closePair acc ps
+ LexemePairAny ps -> appendTokens acc $ tokens $ TokenPlain . fst . (`pairBorders` mempty) <$> ps
+ LexemePairBoth ps -> appendTokens acc $ tokens $ (`TokenPair`mempty) <$> ps
+ LexemeEscape c -> appendToken acc $ TokenEscape c
+ LexemeLink t -> appendToken acc $ TokenLink t
+ LexemeWhite cs -> appendToken acc $ TokenPlain cs
+ LexemeAlphaNum cs -> appendToken acc $ TokenPlain $ Text.pack cs
+ LexemeChar c -> appendToken acc $ TokenPlain $ Text.singleton c
+ LexemeToken ts -> appendTokens acc ts
-p_PairCloseWhite :: Parser e s [Lexeme]
-p_PairCloseWhite = pdbg "PairCloseWhite" $
- (\c b o -> mconcat c <> b <> mconcat o)
- <$> P.some (P.try $
- P.try p_ElemOpen <|>
- P.try p_ElemClose <|>
- P.try p_PairClose <|>
- pure . LexemePunctOrSym <$> p_PunctOrSym)
- <*> (pure <$> p_White <|> P.eof $> [])
- <*> P.many (P.try $
- P.try p_ElemOpen <|>
- P.try p_ElemClose <|>
- P.try p_PairOpen <|>
- pure . LexemePunctOrSym <$> p_PunctOrSym)
-
-p_PairWhiteOpen :: Bool -> Parser e s [Lexeme]
-p_PairWhiteOpen isBOF = pdbg "PairWhiteOpen" $
- (\b o -> b <> mconcat o)
- <$> (if isBOF then return [] else pure <$> p_White)
- <*> P.some (P.try $
- P.try p_ElemOpen <|>
- P.try p_ElemClose <|>
- P.try p_PairOpen <|>
- pure . LexemePunctOrSym <$> p_PunctOrSym)
-
-p_PairCloseBorder :: Parser e s [Lexeme]
-p_PairCloseBorder = pdbg "PairCloseBorder" $
- P.try p0 <|> p1
+-- * Type 'Lexeme'
+data Lexeme
+ = LexemePairOpen ![Pair]
+ | LexemePairClose ![Pair]
+ | LexemePairAny ![Pair]
+ | LexemePairBoth ![Pair]
+ | LexemeEscape !Char
+ | LexemeLink !Text
+ | LexemeWhite !White
+ | LexemeAlphaNum ![Char]
+ | LexemeChar !Char
+ | LexemeToken !Tokens
+ deriving (Eq, Show)
+
+p_satisfyMaybe :: (P.MonadParsec e s m, P.Token s ~ Char) => (Char -> Maybe a) -> m a
+p_satisfyMaybe f = P.token testChar Nothing
where
- p0 =
- (\c b -> mconcat $ c <> b)
- <$> P.some (P.try p_PairClose)
- <*> P.some (P.try $
- P.choice
- [ P.try p_ElemOpen
- , P.try p_ElemClose
- , do
- c <- p_PunctOrSym
- case l_PairOpen c <|> l_PairClose c of
- Just l -> return [l]
- Nothing -> fail "PairCloseBorder"
- ])
- p1 =
- (\c b -> mconcat c <> [LexemePunctOrSym b])
- <$> P.some (P.try p_PairClose)
- <*> p_PunctOrSym
+ testChar c =
+ case f c of
+ Just a -> Right a
+ Nothing -> Left (Set.singleton $ P.Tokens $ c:|[], Set.empty, Set.empty)
-p_PairBorderOpen :: Parser e s [Lexeme]
-p_PairBorderOpen = pdbg "PairBorderOpen" $
- P.try p0 <|> p1
+p_Tokens :: Parser e s Tokens
+p_Tokens = pdbg "Tokens" $
+ closePairs .
+ foldr appendLexeme mempty .
+ dbg "Lexemes" .
+ mangleLexemes .
+ (LexemeWhite "" :) <$>
+ go [LexemeWhite ""]
where
- p0 =
- (\b o -> mconcat $ b <> o)
- <$> P.some (P.try $
- P.choice
- [ P.try p_ElemOpen
- , P.try p_ElemClose
- , do
- c <- p_PunctOrSym
- case l_PairOpen c <|> l_PairClose c of
- Just l -> return [l]
- Nothing -> fail "PairBorderOpen"
- ])
- <*> P.some (P.try p_PairOpen)
- p1 =
- (\b o -> LexemePunctOrSym b : mconcat o)
- <$> p_PunctOrSym
- <*> P.some (P.try p_PairOpen)
-
-p_PairOpen :: Parser e s [Lexeme]
-p_PairOpen = pdbg "PairOpen" $ do
+ go :: [Lexeme] -> Parser e s [Lexeme]
+ go acc =
+ (P.eof $> acc) <|>
+ (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
+
+ mangleLexemes = \case
+ w@LexemeWhite{} :p@LexemePairAny{}:acc -> w:any2close p:acc
+ p@LexemePairAny{}:w@LexemeWhite{} :acc -> any2open p:w:acc
+
+ l@LexemeAlphaNum{}:c@LexemeChar{} :p@LexemePairAny{}:acc -> l:c:any2close p:acc
+ l@LexemeAlphaNum{}:p@LexemePairAny{}:c@LexemeChar{}:acc -> l:any2open p:c:acc
+
+ acc -> acc
+
+ any2close,any2open :: Lexeme -> Lexeme
+ any2close (LexemePairAny ps) = LexemePairClose ps
+ any2close c = c
+ any2open (LexemePairAny ps) = LexemePairOpen ps
+ any2open c = c
+
+pairAny :: Char -> Maybe Pair
+pairAny = \case
+ '-' -> Just PairDash
+ '/' -> Just PairSlash
+ '"' -> Just PairDoublequote
+ '\'' -> Just PairSinglequote
+ '`' -> Just PairBackquote
+ '_' -> Just PairUnderscore
+ '*' -> Just PairStar
+ '#' -> Just PairHash
+ _ -> Nothing
+
+pairOpen :: Char -> Maybe Pair
+pairOpen = \case
+ '(' -> Just PairParen
+ '[' -> Just PairBracket
+ '{' -> Just PairBrace
+ '«' -> Just PairFrenchquote
+ _ -> Nothing
+
+pairClose :: Char -> Maybe Pair
+pairClose = \case
+ ')' -> Just PairParen
+ ']' -> Just PairBracket
+ '}' -> Just PairBrace
+ '»' -> Just PairFrenchquote
+ _ -> Nothing
+
+p_Lexeme :: Parser e s Lexeme
+p_Lexeme = pdbg "Lexeme" $
P.choice
- [ P.try p_ElemOpen
- , P.try (pure <$> p_Escape)
- , P.try (pure <$> p_Link)
- , do
- c <- p_PunctOrSym
- case l_PairOpenOrClose LexemePairOpen c of
- Just l -> return [l]
- _ -> fail "PairOpen"
+ [ P.try $ LexemeWhite <$> p_Spaces
+ , P.try $ LexemePairAny <$> P.some (p_satisfyMaybe pairAny)
+ , P.try $ LexemePairBoth <$> P.some (P.try p_ElemSingle)
+ , P.try $ LexemePairOpen <$> P.some (p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
+ , P.try $ LexemePairClose <$> P.some (p_satisfyMaybe pairClose <|> P.try p_ElemClose)
+ , P.try $ LexemeEscape <$> p_Escape
+ , P.try $ LexemeLink <$> p_Link
+ , P.try $ LexemeAlphaNum <$> P.some p_AlphaNum
+ , LexemeChar <$> P.anyChar
]
-p_PairClose :: Parser e s [Lexeme]
-p_PairClose = pdbg "PairClose" $ do
- P.choice
- [ P.try p_ElemClose
- , P.try p_ElemSingle
- , P.try (pure <$> p_Escape)
- , P.try (pure <$> p_Link)
- , do
- c <- p_PunctOrSym
- case l_PairOpenOrClose LexemePairClose c of
- Just l -> return [l]
- _ -> fail "PairClose"
- ]
+p_AlphaNum :: Parser e s Char
+p_AlphaNum = P.satisfy Char.isAlphaNum
-l_PairOpenOrClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
-l_PairOpenOrClose lxm c =
- l_PairOpenAndClose lxm c <|>
- l_PairOpen c <|>
- l_PairClose c
+p_Escape :: Parser e s Char
+p_Escape = P.char '\\' *> P.satisfy Char.isPrint
-l_PairOpenAndClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
-l_PairOpenAndClose lxm c =
- case c of
- '/' -> Just $ lxm PairSlash
- '-' -> Just $ lxm PairDash
- '"' -> Just $ lxm PairDoublequote
- '\'' -> Just $ lxm PairSinglequote
- '`' -> Just $ lxm PairBackquote
- '_' -> Just $ lxm PairUnderscore
- '*' -> Just $ lxm PairStar
- '#' -> Just $ lxm PairHash
- _ -> Nothing
-
-l_PairOpen :: Char -> Maybe Lexeme
-l_PairOpen c =
- case c of
- '(' -> Just $ LexemePairOpen PairParen
- '[' -> Just $ LexemePairOpen PairBracket
- '{' -> Just $ LexemePairOpen PairBrace
- '«' -> Just $ LexemePairOpen PairFrenchquote
- _ -> Nothing
-
-l_PairClose :: Char -> Maybe Lexeme
-l_PairClose c =
- case c of
- ')' -> Just $ LexemePairClose PairParen
- ']' -> Just $ LexemePairClose PairBracket
- '}' -> Just $ LexemePairClose PairBrace
- '»' -> Just $ LexemePairClose PairFrenchquote
- _ -> Nothing
-
-p_Link :: Parser e s Lexeme
+p_Link :: Parser e s Text
p_Link =
- (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
+ (\scheme ss addr -> Text.pack $ scheme <> ss <> addr)
<$> P.option "" (P.try p_scheme)
<*> P.string "//"
<*> p_addr
|| c=='?'
|| c=='='
-p_Escape :: Parser e s Lexeme
-p_Escape =
- LexemeEscape
- <$ P.char '\\'
- <*> P.satisfy Char.isPrint
-
-p_ElemSingle :: Parser e s [Lexeme]
+p_ElemSingle :: Parser e s Pair
p_ElemSingle = pdbg "ElemSingle" $
- (\e as ->
- [ LexemePairOpen $ PairElem e as
- , LexemeToken $ mempty
- , LexemePairClose $ PairElem e [] ])
+ PairElem
<$ P.char '<'
<*> p_Word
<*> p_Attrs
<* P.string "/>"
-p_ElemOpen :: Parser e s [Lexeme]
+p_ElemOpen :: Parser e s Pair
p_ElemOpen = pdbg "ElemOpen" $
- (\e as oc ->
- case oc of
- True -> [ LexemePairOpen $ PairElem e as
- , LexemeToken $ mempty
- , LexemePairClose $ PairElem e [] ]
- False -> [ LexemePairOpen $ PairElem e as
- , LexemeToken $ tokens [TokenPlain ""]
- ])
+ PairElem
<$ P.char '<'
<*> p_Word
<*> p_Attrs
- <*> P.option False (True <$ P.char '/')
<* P.char '>'
-p_ElemClose :: Parser e s [Lexeme]
+p_ElemClose :: Parser e s Pair
p_ElemClose = pdbg "ElemClose" $
- (\e -> [LexemePairClose $ PairElem e []])
+ (`PairElem` [])
<$ P.string "</"
<*> p_Word
<* P.char '>'
+
+{-
+p_ElemOpenOrSingle :: Parser e s Pair
+p_ElemOpenOrSingle =
+ p_ElemOpen >>= \p ->
+ P.char '>' $> LexemePairOpen p <|>
+ P.string "/>" $> LexemePairAny p
+-}