1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.), flip)
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..), fromMaybe)
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence (ViewL(..), (<|))
19 import Data.Text (Text)
20 import Data.Text.Buildable (Buildable(..))
21 import Data.Tuple (fst,snd)
22 import Text.Show (Show(..))
23 import qualified Data.Char as Char
24 import qualified Data.Sequence as Seq
25 import qualified Data.Text as Text
26 import qualified Data.Text.Lazy as TL
27 import qualified Data.Text.Lazy.Builder as Builder
28 import qualified Text.Megaparsec as P
30 import Language.TCT.Token
31 import Language.TCT.Elem
32 import Language.TCT.Read.Elem -- hiding (pdbg)
34 -- pdbg m p = P.dbg m p
36 textOf :: Buildable a => a -> Text
37 textOf = TL.toStrict . Builder.toLazyText . build
40 type Pairs = (Token,[(Pair,Token)])
42 openPair :: Pair -> Pairs -> Pairs
43 openPair g (t,ms) = (t,(g,mempty):ms)
45 insertToken :: Token -> Pairs -> Pairs
46 insertToken tok (t,[]) = (t<>tok,[])
47 insertToken tok (t,(g0,t0):gs) = (t,(g0,t0<>tok):gs)
49 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
50 closePair :: Pair -> Pairs -> Pairs
51 closePair g (t,[]) = dbg "closePair" $ (t<>TokenPlain (snd $ pairBorders g mempty),[])
52 closePair g (t,(g1,m1):ms) = dbg "closePair" $
54 (PairElem x ax, PairElem y ay) | x == y ->
55 insertToken (TokenPair (PairElem x (ax<>ay)) m1) (t,ms)
56 (x,y) | x == y -> insertToken (TokenPair g1 m1) (t,ms)
60 (closelessPair mempty (g1,m1))
63 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
64 closelessPair :: Token -> (Pair,Token) -> Token
65 closelessPair acc (g,t) = dbg "closelessPair" $
67 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
68 PairHash | TokenPlain p :< toks <- Seq.viewl $ unTokens $ t <> acc ->
69 case Text.findIndex (not . isTagChar) p of
70 Just 0 -> TokenPlain (fst $ pairBorders g mempty) <> t <> acc
71 Just i -> Tokens $ TokenTag tag <| TokenPlain p' <| toks
72 where (tag,p') = Text.splitAt i p
73 Nothing -> Tokens $ TokenTag p <| toks
74 _ -> TokenPlain (fst $ pairBorders g mempty) <> t <> acc
79 case Char.generalCategory c of
80 Char.DashPunctuation -> True
81 Char.ConnectorPunctuation -> True
84 -- | Close remaining 'Pair's at end of parsing.
85 closePairs :: Pairs -> Token
86 closePairs (t0,gs) = dbg "closePairs" $
87 t0 <> foldl' closelessPair mempty gs
92 | LexemePairClose Pair
93 | LexemePunctOrSym Char
101 appendLexeme :: Lexeme -> Pairs -> Pairs
102 appendLexeme lex gs =
104 case dbg "appendLexeme" lex of
105 LexemePairOpen g -> openPair g gs
106 LexemePairClose g -> closePair g gs
107 LexemePunctOrSym c -> insertToken (TokenPlain (Text.singleton c)) gs
108 LexemeWhite wh -> insertToken (TokenPlain wh) gs
109 LexemeWord wo -> insertToken (TokenPlain wo) gs
110 LexemeToken tok -> insertToken tok gs
111 LexemeEscape c -> insertToken (TokenEscape c) gs
112 LexemeLink lnk -> insertToken (TokenLink lnk) gs
114 appendLexemes :: Pairs -> [Lexeme] -> Pairs
115 appendLexemes = foldl' (flip appendLexeme)
119 p_Token :: Parser e s Token
120 p_Token = closePairs <$> p_Pairs (mempty,[])
122 p_Pairs :: Pairs -> Parser e s Pairs
123 p_Pairs gs = pdbg "Pairs" $
124 (p_Lexemes (mempty == gs) >>= p_Pairs . appendLexemes gs) <|>
127 p_Lexemes :: Bool -> Parser e s [Lexeme]
128 p_Lexemes isBOF = pdbg "Lexemes" $
130 [ P.try $ p_PairCloseWhite
131 , P.try $ p_PairWhiteOpen isBOF
132 , P.try $ p_PairCloseBorder
133 , P.try $ p_PairBorderOpen
134 , P.try $ p_PairClose
136 , P.try $ pure . LexemePunctOrSym <$> p_PunctOrSym
137 , P.try $ pure <$> p_White
138 , pure . LexemeWord <$> p_Word
141 p_White :: Parser e s Lexeme
142 p_White = pdbg "White" $ LexemeWhite <$> p_Spaces
144 p_PunctOrSym :: Parser e s Char
145 p_PunctOrSym = P.satisfy $ \c ->
146 Char.isPunctuation c ||
149 p_PairCloseWhite :: Parser e s [Lexeme]
150 p_PairCloseWhite = pdbg "PairCloseWhite" $
151 (\c b -> mconcat c <> b)
152 <$> P.some (P.try p_PairClose <|> pure . LexemePunctOrSym <$> p_PunctOrSym)
153 <*> ((pure <$> p_White) <|> P.eof $> [])
155 p_PairWhiteOpen :: Bool -> Parser e s [Lexeme]
156 p_PairWhiteOpen isBOF = pdbg "PairWhiteOpen" $
157 (\b o -> b <> mconcat o)
158 <$> (if isBOF then return [] else pure <$> p_White)
159 <*> P.some (P.try p_PairOpen <|> pure . LexemePunctOrSym <$> p_PunctOrSym)
161 p_PairCloseBorder :: Parser e s [Lexeme]
162 p_PairCloseBorder = pdbg "PairCloseBorder" $
166 (\c b -> mconcat $ c <> b)
167 <$> P.some (P.try p_PairClose)
168 <*> P.some (P.try $ P.choice
173 case l_PairClose c of
176 case l_PairOpenAndClose LexemePairOpen c <|> l_PairOpen c of
177 Nothing -> return [LexemePunctOrSym c]
181 (\c b -> mconcat c <> [LexemePunctOrSym b])
182 <$> P.some (P.try p_PairClose)
185 p_PairBorderOpen :: Parser e s [Lexeme]
186 p_PairBorderOpen = pdbg "PairBorderOpen" $
190 (\b o -> mconcat $ b <> o)
191 <$> P.some (P.try $ P.choice
196 case l_PairOpen c <|> l_PairClose c of
200 <*> P.some (P.try p_PairOpen)
202 (\b o -> LexemePunctOrSym b : mconcat o)
204 <*> P.some (P.try p_PairOpen)
206 p_PairOpen :: Parser e s [Lexeme]
207 p_PairOpen = pdbg "PairOpen" $ do
210 , P.try (pure <$> p_Escape)
211 , P.try (pure <$> p_Link)
214 case l_PairOpenOrClose LexemePairOpen c of
219 p_PairClose :: Parser e s [Lexeme]
220 p_PairClose = pdbg "PairClose" $ do
224 , P.try (pure <$> p_Escape)
225 , P.try (pure <$> p_Link)
228 case l_PairOpenOrClose LexemePairClose c of
233 p_PairPlain :: (Char -> Maybe Lexeme) -> Parser e s [Lexeme]
234 p_PairPlain pair = pdbg "PairPlain" $ do
235 (<$> p_PunctOrSym) $ \c ->
237 LexemePunctOrSym c `fromMaybe`
240 l_PairOpenAndClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
241 l_PairOpenAndClose lxm c =
243 '/' -> Just $ lxm PairSlash
244 '-' -> Just $ lxm PairDash
245 '"' -> Just $ lxm PairDoublequote
246 '\'' -> Just $ lxm PairSinglequote
247 '`' -> Just $ lxm PairBackquote
248 '_' -> Just $ lxm PairUnderscore
249 '*' -> Just $ lxm PairStar
250 '#' -> Just $ lxm PairHash
253 l_PairOpenOrClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
254 l_PairOpenOrClose lxm c =
255 l_PairOpenAndClose lxm c <|>
259 l_PairOpen :: Char -> Maybe Lexeme
262 '(' -> Just $ LexemePairOpen PairParen
263 '[' -> Just $ LexemePairOpen PairBracket
264 '{' -> Just $ LexemePairOpen PairBrace
265 '«' -> Just $ LexemePairOpen PairFrenchquote
268 l_PairClose :: Char -> Maybe Lexeme
271 ')' -> Just $ LexemePairClose PairParen
272 ']' -> Just $ LexemePairClose PairBracket
273 '}' -> Just $ LexemePairClose PairBrace
274 '»' -> Just $ LexemePairClose PairFrenchquote
277 p_Link :: Parser e s Lexeme
279 (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
280 <$> P.option "" (P.try p_scheme)
286 <$> P.some (P.satisfy $ \c ->
304 p_Escape :: Parser e s Lexeme
308 <*> P.satisfy Char.isPrint
310 p_ElemSingle :: Parser e s [Lexeme]
311 p_ElemSingle = pdbg "ElemOpen" $
313 [ LexemePairOpen $ PairElem e as
314 , LexemeToken $ Tokens mempty
315 -- NOTE: encode that it's the same Elem for open and close
316 , LexemePairClose $ PairElem e [] ])
322 p_ElemOpen :: Parser e s [Lexeme]
323 p_ElemOpen = pdbg "ElemOpen" $
326 True -> [ LexemePairOpen $ PairElem e as
327 , LexemeToken $ Tokens mempty
328 , LexemePairClose $ PairElem e [] ]
329 False -> [LexemePairOpen $ PairElem e as])
333 <*> P.option False (True <$ P.char '/')
336 p_ElemClose :: Parser e s [Lexeme]
337 p_ElemClose = pdbg "ElemClose" $
338 (\e -> [LexemePairClose $ PairElem e []])