1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.), flip)
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..))
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence (ViewL(..), (<|))
19 import Data.Text (Text)
20 import Data.Text.Buildable (Buildable(..))
21 import Data.Tuple (fst,snd)
22 import Text.Show (Show(..))
23 import qualified Data.Char as Char
24 import qualified Data.Sequence as Seq
25 import qualified Data.Text as Text
26 import qualified Data.Text.Lazy as TL
27 import qualified Data.Text.Lazy.Builder as Builder
28 import qualified Text.Megaparsec as P
30 import Language.TCT.Tree (int64)
31 import Language.TCT.Token
32 import Language.TCT.Elem -- hiding (dbg)
33 import Language.TCT.Read.Elem -- hiding (pdbg)
36 import Debug.Trace (trace)
37 dbg m x = trace (m <> ": " <> show x) x
41 textOf :: Buildable a => a -> Text
42 textOf = TL.toStrict . Builder.toLazyText . build
45 type Pairs = (Tokens,[(Pair,Tokens)])
47 openPair :: Pair -> Pairs -> Pairs
48 openPair g (t,ms) = (t,(g,mempty):ms)
50 insertToken :: Token -> Pairs -> Pairs
51 insertToken tok (t,[]) = (t<>tokens [tok],[])
52 insertToken tok (t,(p0,t0):ps) = (t,(p0,t0<>tokens [tok]):ps)
54 insertTokens :: Tokens -> Pairs -> Pairs
55 insertTokens toks (t,[]) = (t<>toks,[])
56 insertTokens toks (t,(p0,t0):ps) = (t,(p0,t0<>toks):ps)
58 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
59 closePair :: Pair -> Pairs -> Pairs
60 closePair p (t,[]) = dbg "closePair" $
61 (t<>tokens [TokenPlain $ snd $ pairBorders p $ tokens [TokenPlain ""]],[])
62 closePair p (t,(p1,t1):ts) = dbg "closePair" $
64 (PairElem x ax, PairElem y ay) | x == y ->
65 insertToken (TokenPair (PairElem x (ax<>ay)) t1) (t,ts)
66 (x,y) | x == y -> insertToken (TokenPair p1 t1) (t,ts)
70 (closeUnpaired mempty (p1,t1))
73 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
74 closeUnpaired :: Tokens -> (Pair,Tokens) -> Tokens
75 closeUnpaired acc (p,tn) = dbg "closeUnpaired" $
77 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
78 PairHash | TokenPlain t :< ts <- Seq.viewl $ unTokens $ tn <> acc ->
79 case Text.findIndex (not . isTagChar) (TL.toStrict t) of
80 Just 0 -> tokens [TokenPlain $ fst $ pairBorders p mempty] <> tn <> acc
81 Just i -> Tokens $ TokenTag (TL.toStrict tag) <| TokenPlain t' <| ts
82 where (tag,t') = TL.splitAt (int64 i) t
83 Nothing -> Tokens $ TokenTag (TL.toStrict t) <| ts
84 _ -> tokens [TokenPlain $ fst $ pairBorders p $ tokens [TokenPlain ""]] <> tn <> acc
89 case Char.generalCategory c of
90 Char.DashPunctuation -> True
91 Char.ConnectorPunctuation -> True
94 -- | Close remaining 'Pair's at end of parsing.
95 closePairs :: Pairs -> Tokens
96 closePairs (t0,ps) = dbg "closePairs" $
97 t0 <> foldl' closeUnpaired mempty ps
101 = LexemePairOpen !Pair
102 | LexemePairClose !Pair
103 | LexemePunctOrSym !Char
105 | LexemeWord !TL.Text
106 | LexemeToken !Tokens
111 appendLexeme :: Lexeme -> Pairs -> Pairs
112 appendLexeme lex ps =
114 case dbg "appendLexeme" lex of
115 LexemePairOpen p -> openPair p ps
116 LexemePairClose p -> closePair p ps
117 LexemePunctOrSym c -> insertToken (TokenPlain (TL.singleton c)) ps
118 LexemeWhite wh -> insertToken (TokenPlain (TL.fromStrict wh)) ps
119 LexemeWord wo -> insertToken (TokenPlain wo) ps
120 LexemeToken ts -> insertTokens ts ps
121 LexemeEscape c -> insertToken (TokenEscape c) ps
122 LexemeLink lnk -> insertToken (TokenLink lnk) ps
124 appendLexemes :: Pairs -> [Lexeme] -> Pairs
125 appendLexemes = foldl' (flip appendLexeme)
129 p_Tokens :: Parser e s Tokens
130 p_Tokens = closePairs <$> p_Pairs (mempty,[])
132 p_Pairs :: Pairs -> Parser e s Pairs
133 p_Pairs gs = pdbg "Pairs" $
134 (p_Lexemes (mempty == gs) >>= p_Pairs . appendLexemes gs) <|>
137 p_Lexemes :: Bool -> Parser e s [Lexeme]
138 p_Lexemes isBOF = pdbg "Lexemes" $
140 [ P.try $ p_PairCloseWhite
141 , P.try $ p_PairWhiteOpen isBOF
142 , P.try $ p_PairCloseBorder
143 , P.try $ p_PairBorderOpen
144 , P.try $ p_PairClose
146 , P.try $ pure . LexemePunctOrSym <$> p_PunctOrSym
147 , P.try $ pure <$> p_White
148 , pure . LexemeWord <$> p_Word
151 p_White :: Parser e s Lexeme
152 p_White = pdbg "White" $ LexemeWhite <$> p_Spaces
154 p_PunctOrSym :: Parser e s Char
157 Char.isPunctuation c ||
160 p_PairCloseWhite :: Parser e s [Lexeme]
161 p_PairCloseWhite = pdbg "PairCloseWhite" $
162 (\c b o -> mconcat c <> b <> mconcat o)
165 P.try p_ElemClose <|>
166 P.try p_PairClose <|>
167 pure . LexemePunctOrSym <$> p_PunctOrSym)
168 <*> (pure <$> p_White <|> P.eof $> [])
171 P.try p_ElemClose <|>
173 pure . LexemePunctOrSym <$> p_PunctOrSym)
175 p_PairWhiteOpen :: Bool -> Parser e s [Lexeme]
176 p_PairWhiteOpen isBOF = pdbg "PairWhiteOpen" $
177 (\b o -> b <> mconcat o)
178 <$> (if isBOF then return [] else pure <$> p_White)
181 P.try p_ElemClose <|>
183 pure . LexemePunctOrSym <$> p_PunctOrSym)
185 p_PairCloseBorder :: Parser e s [Lexeme]
186 p_PairCloseBorder = pdbg "PairCloseBorder" $
190 (\c b -> mconcat $ c <> b)
191 <$> P.some (P.try p_PairClose)
198 case l_PairOpen c <|> l_PairClose c of
200 Nothing -> fail "PairCloseBorder"
203 (\c b -> mconcat c <> [LexemePunctOrSym b])
204 <$> P.some (P.try p_PairClose)
207 p_PairBorderOpen :: Parser e s [Lexeme]
208 p_PairBorderOpen = pdbg "PairBorderOpen" $
212 (\b o -> mconcat $ b <> o)
219 case l_PairOpen c <|> l_PairClose c of
221 Nothing -> fail "PairBorderOpen"
223 <*> P.some (P.try p_PairOpen)
225 (\b o -> LexemePunctOrSym b : mconcat o)
227 <*> P.some (P.try p_PairOpen)
229 p_PairOpen :: Parser e s [Lexeme]
230 p_PairOpen = pdbg "PairOpen" $ do
233 , P.try (pure <$> p_Escape)
234 , P.try (pure <$> p_Link)
237 case l_PairOpenOrClose LexemePairOpen c of
242 p_PairClose :: Parser e s [Lexeme]
243 p_PairClose = pdbg "PairClose" $ do
247 , P.try (pure <$> p_Escape)
248 , P.try (pure <$> p_Link)
251 case l_PairOpenOrClose LexemePairClose c of
253 _ -> fail "PairClose"
256 l_PairOpenOrClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
257 l_PairOpenOrClose lxm c =
258 l_PairOpenAndClose lxm c <|>
262 l_PairOpenAndClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
263 l_PairOpenAndClose lxm c =
265 '/' -> Just $ lxm PairSlash
266 '-' -> Just $ lxm PairDash
267 '"' -> Just $ lxm PairDoublequote
268 '\'' -> Just $ lxm PairSinglequote
269 '`' -> Just $ lxm PairBackquote
270 '_' -> Just $ lxm PairUnderscore
271 '*' -> Just $ lxm PairStar
272 '#' -> Just $ lxm PairHash
275 l_PairOpen :: Char -> Maybe Lexeme
278 '(' -> Just $ LexemePairOpen PairParen
279 '[' -> Just $ LexemePairOpen PairBracket
280 '{' -> Just $ LexemePairOpen PairBrace
281 '«' -> Just $ LexemePairOpen PairFrenchquote
284 l_PairClose :: Char -> Maybe Lexeme
287 ')' -> Just $ LexemePairClose PairParen
288 ']' -> Just $ LexemePairClose PairBracket
289 '}' -> Just $ LexemePairClose PairBrace
290 '»' -> Just $ LexemePairClose PairFrenchquote
293 p_Link :: Parser e s Lexeme
295 (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
296 <$> P.option "" (P.try p_scheme)
302 <$> P.some (P.satisfy $ \c ->
323 p_Escape :: Parser e s Lexeme
327 <*> P.satisfy Char.isPrint
329 p_ElemSingle :: Parser e s [Lexeme]
330 p_ElemSingle = pdbg "ElemSingle" $
331 (\(TL.toStrict -> e) as ->
332 [ LexemePairOpen $ PairElem e as
333 , LexemeToken $ mempty
334 , LexemePairClose $ PairElem e [] ])
340 p_ElemOpen :: Parser e s [Lexeme]
341 p_ElemOpen = pdbg "ElemOpen" $
342 (\(TL.toStrict -> e) as oc ->
344 True -> [ LexemePairOpen $ PairElem e as
345 , LexemeToken $ mempty
346 , LexemePairClose $ PairElem e [] ]
347 False -> [ LexemePairOpen $ PairElem e as
348 , LexemeToken $ tokens [TokenPlain ""]
353 <*> P.option False (True <$ P.char '/')
356 p_ElemClose :: Parser e s [Lexeme]
357 p_ElemClose = pdbg "ElemClose" $
358 (\(TL.toStrict -> e) -> [LexemePairClose $ PairElem e []])