{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Rank2Types #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
module Language.TCT.Read.Token where
import Control.Applicative (Applicative(..), Alternative(..))
import Control.Monad (Monad(..))
import Data.Bool
import Data.Char (Char)
+import Data.Either (Either(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..))
-import Data.Function (($), (.), flip)
-import Data.Functor ((<$>), ($>), (<$))
+import Data.Function (($), (.))
+import Data.Functor ((<$>), ($>))
+import Data.List.NonEmpty (NonEmpty(..))
import Data.Maybe (Maybe(..))
import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
-import Data.Sequence (ViewL(..), (<|))
-import Data.Text (Text)
-import Data.Text.Buildable (Buildable(..))
+import Data.Sequence (ViewL(..), ViewR(..), (<|))
+import Data.TreeSeq.Strict (Tree(..), Trees)
import Data.Tuple (fst,snd)
+import Data.Void (Void)
+import Prelude (Num(..))
import Text.Show (Show(..))
import qualified Data.Char as Char
+import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.Sequence as Seq
import qualified Data.Text as Text
import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.Builder as Builder
import qualified Text.Megaparsec as P
+import qualified Text.Megaparsec.Char as P
-import Language.TCT.Token
-import Language.TCT.Elem -- hiding (dbg)
-import Language.TCT.Read.Elem -- hiding (pdbg)
-
-{-
-import Debug.Trace (trace)
-dbg m x = trace (m <> ": " <> show x) x
-pdbg m p = P.dbg m p
--}
-
-textOf :: Buildable a => a -> Text
-textOf = TL.toStrict . Builder.toLazyText . build
+import Language.TCT.Debug
+import Language.TCT.Cell
+import Language.TCT.Elem
+import Language.TCT.Tree
+import Language.TCT.Read.Elem
+import Language.TCT.Read.Cell
-- * Type 'Pairs'
-type Pairs = (Tokens,[(Pair,Tokens)])
-
-openPair :: Pair -> Pairs -> Pairs
-openPair g (t,ms) = (t,(g,mempty):ms)
-
-insertToken :: Token -> Pairs -> Pairs
-insertToken tok (t,[]) = (t<>tokens [tok],[])
-insertToken tok (t,(p0,t0):ps) = (t,(p0,t0<>tokens [tok]):ps)
-
-insertTokens :: Tokens -> Pairs -> Pairs
-insertTokens toks (t,[]) = (t<>toks,[])
-insertTokens toks (t,(p0,t0):ps) = (t,(p0,t0<>toks):ps)
+-- | Right-only Dyck language,
+-- to keep track of opened 'Pair's.
+type Pairs = (Tokens,[Opening])
+type Tokens = Trees (Cell Node)
+
+-- ** Type 'Opening'
+-- | An opened 'Pair' and its content so far.
+type Opening = (Cell Pair,Tokens)
+
+appendPairsToken :: Pairs -> Tree (Cell Node) -> Pairs
+appendPairsToken ps t = appendPairsTokens ps (pure t)
+
+appendPairsText :: Pairs -> Cell TL.Text -> Pairs
+appendPairsText ps (Cell sp t) =
+ appendPairsToken ps $
+ Tree0 $ Cell sp $
+ NodeToken $ TokenText t
+
+appendPairsTokens :: Pairs -> Tokens -> Pairs
+appendPairsTokens (ts,[]) toks = (ts`unionTokens`toks,[])
+appendPairsTokens (ts,(p0,t0):ps) toks = (ts,(p0,t0`unionTokens`toks):ps)
+
+-- | Unify two 'Tokens', merging border 'TokenText's if any.
+unionTokens :: Tokens -> Tokens -> Tokens
+unionTokens x y =
+ case (Seq.viewr x, Seq.viewl y) of
+ (xs :> x0, y0 :< ys) ->
+ case (x0,y0) of
+ ( Tree (Cell (Span fx bx _ex:| sx) (NodeToken (TokenText tx))) tsx
+ , Tree (Cell (Span _fy _by ey:|_sy) (NodeToken (TokenText ty))) tsy ) ->
+ xs `unionTokens`
+ pure (Tree (Cell (Span fx bx ey:|sx) $ NodeToken $ TokenText $ tx <> ty) (tsx<>tsy)) `unionTokens`
+ ys
+ _ -> x <> y
+ (EmptyR, _) -> y
+ (_, EmptyL) -> x
+
+unionsTokens :: Foldable f => f Tokens -> Tokens
+unionsTokens = foldl' unionTokens mempty
+
+openPair :: Pairs -> Cell Pair -> Pairs
+openPair (t,ps) p = (t,(p,mempty):ps)
-- | Close a 'Pair' when there is a matching 'LexemePairClose'.
-closePair :: Pair -> Pairs -> Pairs
-closePair p (t,[]) = dbg "closePair" $
- (t<>tokens [TokenPlain $ snd $ pairBorders p $ tokens [TokenPlain ""]],[])
-closePair p (t,(p1,t1):ts) = dbg "closePair" $
- case (p,p1) of
- (PairElem x ax, PairElem y ay) | x == y ->
- insertToken (TokenPair (PairElem x (ax<>ay)) t1) (t,ts)
- (x,y) | x == y -> insertToken (TokenPair p1 t1) (t,ts)
+closePair :: Pairs -> Cell Pair -> Pairs
+closePair ps@(_,[]) (Cell ssp p) = -- debug0 "closePair" $
+ appendPairsText ps $ Cell ssp $
+ snd $ pairBordersDouble p
+closePair (t,(cx@(Cell (Span fx bx _ex:| sx) px),t1):ts)
+ cy@(Cell (Span _fy _by ey:|_sy) py) = -- debug0 "closePair" $
+ case (px,py) of
+ (PairElem nx ax, PairElem ny ay) | nx == ny ->
+ appendPairsToken (t,ts) $
+ Tree (Cell (Span fx bx ey:|sx) $ NodePair $ PairElem nx as) t1
+ where as | null ay = ax
+ | otherwise = ax<>ay
+ _ | px == py ->
+ appendPairsToken (t,ts) $
+ Tree (Cell (Span fx bx ey:|sx) $ NodePair px) t1
_ ->
- closePair p $
- insertTokens
- (closeUnpaired mempty (p1,t1))
+ (`closePair` cy) $
+ appendPairsTokens
(t,ts)
+ (closeImpaired mempty (cx,t1))
--- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
-closeUnpaired :: Tokens -> (Pair,Tokens) -> Tokens
-closeUnpaired acc (p,tn) = dbg "closeUnpaired" $
- case p of
+-- | Close a 'Pair' when there is no matching 'LexemePairClose'.
+closeImpaired :: Tokens -> (Cell Pair, Tokens) -> Tokens
+closeImpaired acc (Cell ssp@(s0:|sp) pair, toks) = -- debug0 "closeImpaired" $
+ case pair of
-- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
- PairHash | TokenPlain t :< ts <- Seq.viewl $ unTokens $ tn <> acc ->
- case Text.findIndex (not . isTagChar) t of
- Just 0 -> tokens [TokenPlain $ fst $ pairBorders p mempty] <> tn <> acc
- Just i -> Tokens $ TokenTag tag <| TokenPlain t' <| ts
- where (tag,t') = Text.splitAt i t
- Nothing -> Tokens $ TokenTag t <| ts
- _ -> tokens [TokenPlain $ fst $ pairBorders p $ tokens [TokenPlain ""]] <> tn <> acc
+ PairHash | Just (Cell (Span{span_end}:|_st) tag, rest) <- tagFrom body ->
+ Tree0 (Cell (s0{span_end}:|sp) $ NodeToken $ TokenTag tag) <| rest
+ -- NOTE: use bp (not bt) to include the '#'
+ _ -> pure open `unionTokens` body
where
- isTagChar c =
- Char.isAlphaNum c ||
- c=='·' ||
- case Char.generalCategory c of
- Char.DashPunctuation -> True
- Char.ConnectorPunctuation -> True
- _ -> False
+ body = toks `unionTokens` acc
+ open = Tree0 $ Cell ssp $ NodeToken $ TokenText $ fst $ pairBordersDouble pair
-- | Close remaining 'Pair's at end of parsing.
closePairs :: Pairs -> Tokens
-closePairs (t0,ps) = dbg "closePairs" $
- t0 <> foldl' closeUnpaired mempty ps
-
--- * Type 'Lexeme'
-data Lexeme
- = LexemePairOpen Pair
- | LexemePairClose Pair
- | LexemePunctOrSym Char
- | LexemeWhite Text
- | LexemeWord Text
- | LexemeToken Tokens
- | LexemeEscape Char
- | LexemeLink Text
- deriving (Show, Eq)
+closePairs (t0,ps) = -- debug0 "closePairs" $
+ t0 `unionTokens` foldl' closeImpaired mempty ps
appendLexeme :: Lexeme -> Pairs -> Pairs
-appendLexeme lex ps =
- dbg "appendLexeme" $
- case dbg "appendLexeme" lex of
- LexemePairOpen p -> openPair p ps
- LexemePairClose p -> closePair p ps
- LexemePunctOrSym c -> insertToken (TokenPlain (Text.singleton c)) ps
- LexemeWhite wh -> insertToken (TokenPlain wh) ps
- LexemeWord wo -> insertToken (TokenPlain wo) ps
- LexemeToken ts -> insertTokens ts ps
- LexemeEscape c -> insertToken (TokenEscape c) ps
- LexemeLink lnk -> insertToken (TokenLink lnk) ps
+appendLexeme lex acc = -- debug2 "appendLexeme" "lex" "acc" $ \lex acc ->
+ case lex of
+ LexemePairOpen ps -> foldl' open acc ps
+ where
+ -- NOTE: insert an empty node to encode <elem></elem>, not <elem/>
+ open a p@(Cell (Span{span_end, ..}:|sp) PairElem{}) = openPair a p `appendPairsText` Cell (Span{span_begin=span_end, ..}:|sp) ""
+ open a p = openPair a p
+ LexemePairClose ps -> foldl' closePair acc ps
+ LexemePairAny ps -> foldl' openPair acc ps
+ {-
+ LexemePairAny ps ->
+ appendPairsText acc $ sconcat $
+ ((fst . pairBordersSingle) <$>) <$> ps
+ -}
+ LexemePairBoth ps -> appendPairsTokens acc $ Seq.fromList $ toList $ Tree0 . (NodePair <$>) <$> ps
+ LexemeEscape c -> appendPairsToken acc $ Tree0 $ NodeToken . TokenEscape <$> c
+ LexemeLink t -> appendPairsToken acc $ Tree0 $ NodeToken . TokenLink <$> t
+ {-LexemeWhite (unCell -> "") -> acc-}
+ -- LexemeWhite (unCell -> Text.all (==' ') -> True) -> acc
+ LexemeWhite t -> appendPairsText acc t
+ LexemeAlphaNum t -> appendPairsText acc t
+ LexemeOther t -> appendPairsText acc t
+ LexemeTree t -> appendPairsToken acc t
+ LexemeEnd -> acc
appendLexemes :: Pairs -> [Lexeme] -> Pairs
-appendLexemes = foldl' (flip appendLexeme)
-
--- * Parsers
-
-p_Tokens :: Parser e s Tokens
-p_Tokens = closePairs <$> p_Pairs (mempty,[])
-
-p_Pairs :: Pairs -> Parser e s Pairs
-p_Pairs gs = pdbg "Pairs" $
- (p_Lexemes (mempty == gs) >>= p_Pairs . appendLexemes gs) <|>
- (P.eof $> gs)
-
-p_Lexemes :: Bool -> Parser e s [Lexeme]
-p_Lexemes isBOF = pdbg "Lexemes" $
- P.choice
- [ P.try $ p_PairCloseWhite
- , P.try $ p_PairWhiteOpen isBOF
- , P.try $ p_PairCloseBorder
- , P.try $ p_PairBorderOpen
- , P.try $ p_PairClose
- , P.try $ p_PairOpen
- , P.try $ pure . LexemePunctOrSym <$> p_PunctOrSym
- , P.try $ pure <$> p_White
- , pure . LexemeWord <$> p_Word
- ]
+appendLexemes = foldr appendLexeme
-p_White :: Parser e s Lexeme
-p_White = pdbg "White" $ LexemeWhite <$> p_Spaces
-
-p_PunctOrSym :: Parser e s Char
-p_PunctOrSym =
- P.satisfy $ \c ->
- Char.isPunctuation c ||
- Char.isSymbol c
-
-p_PairCloseWhite :: Parser e s [Lexeme]
-p_PairCloseWhite = pdbg "PairCloseWhite" $
- (\c b o -> mconcat c <> b <> mconcat o)
- <$> P.some (P.try $
- P.try p_ElemOpen <|>
- P.try p_ElemClose <|>
- P.try p_PairClose <|>
- pure . LexemePunctOrSym <$> p_PunctOrSym)
- <*> (pure <$> p_White <|> P.eof $> [])
- <*> P.many (P.try $
- P.try p_ElemOpen <|>
- P.try p_ElemClose <|>
- P.try p_PairOpen <|>
- pure . LexemePunctOrSym <$> p_PunctOrSym)
-
-p_PairWhiteOpen :: Bool -> Parser e s [Lexeme]
-p_PairWhiteOpen isBOF = pdbg "PairWhiteOpen" $
- (\b o -> b <> mconcat o)
- <$> (if isBOF then return [] else pure <$> p_White)
- <*> P.some (P.try $
- P.try p_ElemOpen <|>
- P.try p_ElemClose <|>
- P.try p_PairOpen <|>
- pure . LexemePunctOrSym <$> p_PunctOrSym)
-
-p_PairCloseBorder :: Parser e s [Lexeme]
-p_PairCloseBorder = pdbg "PairCloseBorder" $
- P.try p0 <|> p1
- where
- p0 =
- (\c b -> mconcat $ c <> b)
- <$> P.some (P.try p_PairClose)
- <*> P.some (P.try $
- P.choice
- [ P.try p_ElemOpen
- , P.try p_ElemClose
- , do
- c <- p_PunctOrSym
- case l_PairOpen c <|> l_PairClose c of
- Just l -> return [l]
- Nothing -> fail "PairCloseBorder"
- ])
- p1 =
- (\c b -> mconcat c <> [LexemePunctOrSym b])
- <$> P.some (P.try p_PairClose)
- <*> p_PunctOrSym
-
-p_PairBorderOpen :: Parser e s [Lexeme]
-p_PairBorderOpen = pdbg "PairBorderOpen" $
- P.try p0 <|> p1
+-- * Type 'Lexeme'
+-- | 'Lexeme's cut the input in the longest chunks of common semantic,
+-- this enables 'orientLexemePairAny' to work with a more meaningful context.
+data Lexeme
+ = LexemePairOpen !(NonEmpty (Cell Pair))
+ | LexemePairClose !(NonEmpty (Cell Pair))
+ | LexemePairAny !(NonEmpty (Cell Pair))
+ -- ^ orientation depending on the surrounding 'Lexeme's,
+ -- see 'orientLexemePairAny'
+ | LexemePairBoth !(NonEmpty (Cell Pair))
+ | LexemeEscape !(Cell Char)
+ | LexemeLink !(Cell TL.Text)
+ | LexemeWhite !(Cell TL.Text)
+ | LexemeAlphaNum !(Cell TL.Text)
+ | LexemeOther !(Cell TL.Text)
+ | LexemeTree !(Tree (Cell Node))
+ | LexemeEnd
+ deriving (Eq, Show)
+instance Pretty Lexeme
+
+parseTokens :: [Lexeme] -> Tokens
+parseTokens ps =
+ closePairs $
+ appendLexemes mempty $
+ -- debug0 "Lexemes (post orient)" $
+ orientLexemePairAny $ LexemeEnd :
+ ps
+
+parseLexemes ::
+ Cell TL.Text ->
+ Either (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void)) [Lexeme]
+parseLexemes = runParserOnCell (p_Lexemes <* P.eof)
+
+-- | Parse 'Lexeme's, returning them in reverse order
+-- to apply 'orientLexemePairAny'.
+p_Lexemes :: P.Tokens s ~ TL.Text => Parser e s [Lexeme]
+p_Lexemes = debugParser "Lexemes" $ go []
where
- p0 =
- (\b o -> mconcat $ b <> o)
- <$> P.some (P.try $
- P.choice
- [ P.try p_ElemOpen
- , P.try p_ElemClose
- , do
- c <- p_PunctOrSym
- case l_PairOpen c <|> l_PairClose c of
- Just l -> return [l]
- Nothing -> fail "PairBorderOpen"
- ])
- <*> P.some (P.try p_PairOpen)
- p1 =
- (\b o -> LexemePunctOrSym b : mconcat o)
- <$> p_PunctOrSym
- <*> P.some (P.try p_PairOpen)
-
-p_PairOpen :: Parser e s [Lexeme]
-p_PairOpen = pdbg "PairOpen" $ do
- P.choice
- [ P.try p_ElemOpen
- , P.try (pure <$> p_Escape)
- , P.try (pure <$> p_Link)
- , do
- c <- p_PunctOrSym
- case l_PairOpenOrClose LexemePairOpen c of
- Just l -> return [l]
- _ -> fail "PairOpen"
- ]
-
-p_PairClose :: Parser e s [Lexeme]
-p_PairClose = pdbg "PairClose" $ do
+ go :: P.Tokens s ~ TL.Text => [Lexeme] -> Parser e s [Lexeme]
+ go acc =
+ (P.eof $> acc) <|>
+ (p_Lexeme >>= \next -> go $ orientLexemePairAny $ next:acc)
+
+-- | Must be called on parsed 'Lexeme's after the prepending of any new 'Lexeme',
+-- so that it can try to orient nearby 'LexemePairAny'
+-- to 'LexemePairOpen' or 'LexemePairClose'.
+orientLexemePairAny :: [Lexeme] -> [Lexeme]
+orientLexemePairAny = \case
+ -- LexemeOther (Cell _bx ex x):LexemeOther (Cell by _ey y):acc -> LexemeOther (Cell by ex (x<>y)):acc
+
+ -- "
+ t@LexemeTree{}:LexemePairAny p:acc -> t:LexemePairClose p:acc
+ w@LexemeWhite{}:LexemePairAny p:acc -> w:LexemePairClose p:acc
+ LexemeEnd:LexemePairAny p:acc -> LexemePairClose p:acc
+ -- "
+ LexemePairAny p:t@LexemeTree{}:acc -> LexemePairOpen p:t:acc
+ LexemePairAny p:w@LexemeWhite{}:acc -> LexemePairOpen p:w:acc
+ LexemePairAny p:[] -> LexemePairOpen p:[]
+
+ -- ,,,"
+ LexemePairAny p:a@LexemeOther{}:w@LexemeWhite{}:acc -> LexemePairOpen p:a:w:acc
+ LexemePairAny p:a@LexemeOther{}:[] -> LexemePairOpen p:a:[]
+ -- ",,,
+ w@LexemeWhite{}:a@LexemeOther{}:LexemePairAny p:acc -> w:a:LexemePairClose p:acc
+ LexemeEnd:a@LexemeOther{}:LexemePairAny p:acc -> a:LexemePairClose p:acc
+
+ -- ",,,AAA
+ an@LexemeAlphaNum{}:a@LexemeOther{}:LexemePairAny p:acc -> an:a:LexemePairClose p:acc
+ -- ,,,"AAA
+ an@LexemeAlphaNum{}:LexemePairAny p:a@LexemeOther{}:acc -> an:LexemePairOpen p:a:acc
+
+ -- ")
+ c@LexemePairClose{}:LexemePairAny p:acc -> c:LexemePairClose p:acc
+ -- ("
+ LexemePairAny p:o@LexemePairOpen{}:acc -> LexemePairOpen p:o:acc
+
+ -- "(
+ o@LexemePairOpen{}:LexemePairAny p:acc -> o:LexemePairClose p:acc
+ -- )"
+ LexemePairAny p:c@LexemePairClose{}:acc -> LexemePairClose p:c:acc
+
+ acc -> acc
+
+p_Lexeme :: P.Tokens s ~ TL.Text => Parser e s Lexeme
+p_Lexeme = debugParser "Lexeme" $
P.choice
- [ P.try p_ElemClose
- , P.try p_ElemSingle
- , P.try (pure <$> p_Escape)
- , P.try (pure <$> p_Link)
- , do
- c <- p_PunctOrSym
- case l_PairOpenOrClose LexemePairClose c of
- Just l -> return [l]
- _ -> fail "PairClose"
+ [ P.try $ LexemeWhite <$> p_Cell p_Spaces1
+ , P.try $ LexemePairAny <$> p_some (p_Cell $ p_satisfyMaybe pairAny)
+ , P.try $ LexemePairBoth <$> p_some (P.try $ p_Cell p_ElemSingle)
+ , P.try $ LexemePairOpen <$> p_some (p_Cell $ p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
+ , P.try $ LexemePairClose <$> p_some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose)
+ , P.try $ LexemeEscape <$> p_Cell p_Escape
+ , P.try $ LexemeLink <$> p_Cell p_Link
+ , P.try $ LexemeAlphaNum <$> p_Cell (P.takeWhile1P (Just "AlphaNum") Char.isAlphaNum)
+ , LexemeOther <$> p_Cell (TL.singleton <$> P.anyChar)
]
-l_PairOpenOrClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
-l_PairOpenOrClose lxm c =
- l_PairOpenAndClose lxm c <|>
- l_PairOpen c <|>
- l_PairClose c
-
-l_PairOpenAndClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
-l_PairOpenAndClose lxm c =
- case c of
- '/' -> Just $ lxm PairSlash
- '-' -> Just $ lxm PairDash
- '"' -> Just $ lxm PairDoublequote
- '\'' -> Just $ lxm PairSinglequote
- '`' -> Just $ lxm PairBackquote
- '_' -> Just $ lxm PairUnderscore
- '*' -> Just $ lxm PairStar
- '#' -> Just $ lxm PairHash
- _ -> Nothing
-
-l_PairOpen :: Char -> Maybe Lexeme
-l_PairOpen c =
- case c of
- '(' -> Just $ LexemePairOpen PairParen
- '[' -> Just $ LexemePairOpen PairBracket
- '{' -> Just $ LexemePairOpen PairBrace
- '«' -> Just $ LexemePairOpen PairFrenchquote
- _ -> Nothing
-
-l_PairClose :: Char -> Maybe Lexeme
-l_PairClose c =
- case c of
- ')' -> Just $ LexemePairClose PairParen
- ']' -> Just $ LexemePairClose PairBracket
- '}' -> Just $ LexemePairClose PairBrace
- '»' -> Just $ LexemePairClose PairFrenchquote
- _ -> Nothing
-
-p_Link :: Parser e s Lexeme
+p_some :: Parser e s a -> Parser e s (NonEmpty a)
+p_some p = NonEmpty.fromList <$> P.some p
+
+pairAny :: Char -> Maybe Pair
+pairAny = \case
+ '-' -> Just PairDash
+ '/' -> Just PairSlash
+ '"' -> Just PairDoublequote
+ '\'' -> Just PairSinglequote
+ '`' -> Just PairBackquote
+ '_' -> Just PairUnderscore
+ '*' -> Just PairStar
+ '#' -> Just PairHash
+ _ -> Nothing
+
+pairOpen :: Char -> Maybe Pair
+pairOpen = \case
+ '(' -> Just PairParen
+ '[' -> Just PairBracket
+ '{' -> Just PairBrace
+ '«' -> Just PairFrenchquote
+ _ -> Nothing
+
+pairClose :: Char -> Maybe Pair
+pairClose = \case
+ ')' -> Just PairParen
+ ']' -> Just PairBracket
+ '}' -> Just PairBrace
+ '»' -> Just PairFrenchquote
+ _ -> Nothing
+
+p_Escape :: Parser e s Char
+p_Escape = P.char '\\' *> P.satisfy Char.isPrint
+
+p_Link :: P.Tokens s ~ TL.Text => Parser e s TL.Text
p_Link =
- (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
- <$> P.option "" (P.try p_scheme)
- <*> P.string "//"
- <*> p_addr
+ P.try (P.char '<' *> p <* P.char '>') <|>
+ p
where
+ p :: P.Tokens s ~ TL.Text => Parser e s TL.Text
+ p =
+ (\scheme addr -> scheme <> "//" <> addr)
+ <$> P.option "" (P.try p_scheme)
+ <* P.string "//"
+ <*> p_addr
+ p_scheme :: P.Tokens s ~ TL.Text => Parser e s TL.Text
p_scheme =
- (<>)
- <$> P.some (P.satisfy $ \c ->
+ (<> ":")
+ <$> (P.takeWhile1P (Just "scheme") $ \c ->
Char.isAlphaNum c
|| c=='_'
|| c=='-'
|| c=='+')
- <*> P.string ":"
+ <* P.char ':'
+ p_addr :: P.Tokens s ~ TL.Text => Parser e s TL.Text
p_addr =
- P.many $
- P.satisfy $ \c ->
- Char.isAlphaNum c
- || c=='%'
- || c=='/'
- || c=='('
- || c==')'
- || c=='-'
- || c=='_'
- || c=='.'
-
-p_Escape :: Parser e s Lexeme
-p_Escape =
- LexemeEscape
- <$ P.char '\\'
- <*> P.satisfy Char.isPrint
-
-p_ElemSingle :: Parser e s [Lexeme]
-p_ElemSingle = pdbg "ElemSingle" $
- (\e as ->
- [ LexemePairOpen $ PairElem e as
- , LexemeToken $ mempty
- , LexemePairClose $ PairElem e [] ])
- <$ P.char '<'
- <*> p_Word
- <*> p_Attrs
- <* P.string "/>"
-
-p_ElemOpen :: Parser e s [Lexeme]
-p_ElemOpen = pdbg "ElemOpen" $
- (\e as oc ->
- case oc of
- True -> [ LexemePairOpen $ PairElem e as
- , LexemeToken $ mempty
- , LexemePairClose $ PairElem e [] ]
- False -> [ LexemePairOpen $ PairElem e as
- , LexemeToken $ tokens [TokenPlain ""]
- ])
- <$ P.char '<'
- <*> p_Word
- <*> p_Attrs
- <*> P.option False (True <$ P.char '/')
- <* P.char '>'
-
-p_ElemClose :: Parser e s [Lexeme]
-p_ElemClose = pdbg "ElemClose" $
- (\e -> [LexemePairClose $ PairElem e []])
- <$ P.string "</"
- <*> p_Word
- <* P.char '>'
+ P.takeWhileP (Just "addr") $ \c ->
+ Char.isAlphaNum c
+ || c=='%'
+ || c=='/'
+ || c=='('
+ || c==')'
+ || c=='-'
+ || c=='_'
+ || c=='.'
+ || c=='#'
+ || c=='?'
+ || c=='='
+
+pairBorders :: Foldable f => Pair -> f a -> (TL.Text,TL.Text)
+pairBorders p ts | null ts = pairBordersSingle p
+ | otherwise = pairBordersDouble p
+
+pairBordersSingle :: Pair -> (TL.Text,TL.Text)
+pairBordersSingle = \case
+ PairElem n as ->
+ ("<"<>n<>foldMap f as<>"/>","")
+ where f (elemAttr_white,ElemAttr{..}) =
+ elemAttr_white <>
+ elemAttr_name <>
+ elemAttr_open <>
+ elemAttr_value <>
+ elemAttr_close
+ p -> pairBordersDouble p
+
+pairBordersDouble :: Pair -> (TL.Text,TL.Text)
+pairBordersDouble = \case
+ PairElem n as -> ("<"<>n<>foldMap f as<>">","</"<>n<>">")
+ where f (elemAttr_white,ElemAttr{..}) =
+ elemAttr_white <>
+ elemAttr_name <>
+ elemAttr_open <>
+ elemAttr_value <>
+ elemAttr_close
+ PairHash -> ("#","#")
+ PairStar -> ("*","*")
+ PairSlash -> ("/","/")
+ PairUnderscore -> ("_","_")
+ PairDash -> ("-","-")
+ PairBackquote -> ("`","`")
+ PairSinglequote -> ("'","'")
+ PairDoublequote -> ("\"","\"")
+ PairFrenchquote -> ("«","»")
+ PairParen -> ("(",")")
+ PairBrace -> ("{","}")
+ PairBracket -> ("[","]")
+
+-- * Class 'TagFrom'
+class TagFrom a where
+ tagFrom :: a -> Maybe (Cell Tag, a)
+instance TagFrom Tokens where
+ tagFrom ts =
+ case Seq.viewl ts of
+ EmptyL -> Nothing
+ Tree0 (Cell ss0@(Span _f0 _b0 e0:|_s0) n) :< ns ->
+ case n of
+ NodeToken (TokenText t) ->
+ case tagFrom $ Cell ss0 t of
+ Nothing -> Nothing
+ Just (t0,r0) ->
+ if TL.null $ unCell r0
+ then
+ case tagFrom ns of
+ Just (t1@(Cell (Span _f1 b1 _e1:|_s1) _), r1) | e0 == b1 ->
+ Just (t0<>t1, r1)
+ _ -> Just (t0, ns)
+ else Just (t0, pure n0 `unionTokens` ns)
+ where n0 = Tree0 $ NodeToken . TokenText <$> r0
+ _ -> Nothing
+ _ -> Nothing
+instance TagFrom (Cell TL.Text) where
+ tagFrom (Cell (Span fp bp ep:|sp) t)
+ | (w,r) <- TL.span isTagChar t
+ , not $ TL.null w
+ , ew <- pos_column bp + sum (Text.length <$> TL.toChunks w) =
+ Just
+ ( Cell (Span fp bp bp{pos_column=ew}:|sp) w
+ , Cell (Span fp bp{pos_column=ew} ep:|sp) r )
+ tagFrom _ = Nothing
+
+isTagChar :: Char -> Bool
+isTagChar c =
+ Char.isAlphaNum c ||
+ c=='·' ||
+ case Char.generalCategory c of
+ Char.DashPunctuation -> True
+ Char.ConnectorPunctuation -> True
+ _ -> False
+
+{-
+-- | Build 'Tokens' from many 'Token's.
+tokens :: [Cell Token] -> Tokens
+tokens ts = Seq.fromList $ Tree0 . (NodeToken <$>) <$> ts
+
+-- | Build 'Tokens' from one 'Token'.
+tokens1 :: Tree (Cell Node) -> Tokens
+tokens1 = Seq.singleton
+
+unTokenElem :: Tokens -> Maybe (Cell (ElemName,ElemAttrs,Tokens))
+unTokenElem toks =
+ case toList $ {-Seq.dropWhileR isTokenWhite-} toks of
+ [Tree (Cell bp ep (NodePair (PairElem e as))) ts] -> Just (Cell bp ep (e,as,ts))
+ _ -> Nothing
+
+isTokenElem :: Tokens -> Bool
+isTokenElem toks =
+ case toList $ {-Seq.dropWhileR isTokenWhite-} toks of
+ [Tree (unCell -> NodePair PairElem{}) _] -> True
+ _ -> False
+-}