1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.), flip)
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..))
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence (ViewL(..), (<|))
19 import Data.Text (Text)
20 import Data.Text.Buildable (Buildable(..))
21 import Data.Tuple (fst,snd)
22 import Text.Show (Show(..))
23 import qualified Data.Char as Char
24 import qualified Data.Sequence as Seq
25 import qualified Data.Text as Text
26 import qualified Data.Text.Lazy as TL
27 import qualified Data.Text.Lazy.Builder as Builder
28 import qualified Text.Megaparsec as P
30 import Language.TCT.Token
31 import Language.TCT.Elem -- hiding (dbg)
32 import Language.TCT.Read.Elem -- hiding (pdbg)
35 import Debug.Trace (trace)
36 dbg m x = trace (m <> ": " <> show x) x
40 textOf :: Buildable a => a -> Text
41 textOf = TL.toStrict . Builder.toLazyText . build
44 type Pairs = (Tokens,[(Pair,Tokens)])
46 openPair :: Pair -> Pairs -> Pairs
47 openPair g (t,ms) = (t,(g,mempty):ms)
49 insertToken :: Token -> Pairs -> Pairs
50 insertToken tok (t,[]) = (t<>tokens [tok],[])
51 insertToken tok (t,(p0,t0):ps) = (t,(p0,t0<>tokens [tok]):ps)
53 insertTokens :: Tokens -> Pairs -> Pairs
54 insertTokens toks (t,[]) = (t<>toks,[])
55 insertTokens toks (t,(p0,t0):ps) = (t,(p0,t0<>toks):ps)
57 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
58 closePair :: Pair -> Pairs -> Pairs
59 closePair p (t,[]) = dbg "closePair" $
60 (t<>tokens [TokenPlain $ snd $ pairBorders p mempty],[])
61 closePair p (t,(p1,t1):ts) = dbg "closePair" $
63 (PairElem x ax, PairElem y ay) | x == y ->
64 insertToken (TokenPair (PairElem x (ax<>ay)) t1) (t,ts)
65 (x,y) | x == y -> insertToken (TokenPair p1 t1) (t,ts)
69 (closeUnpaired mempty (p1,t1))
72 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
73 closeUnpaired :: Tokens -> (Pair,Tokens) -> Tokens
74 closeUnpaired acc (p,tn) = dbg "closeUnpaired" $
76 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
77 PairHash | TokenPlain t :< ts <- Seq.viewl $ unTokens $ tn <> acc ->
78 case Text.findIndex (not . isTagChar) t of
79 Just 0 -> tokens [TokenPlain $ fst $ pairBorders p mempty] <> tn <> acc
80 Just i -> Tokens $ TokenTag tag <| TokenPlain t' <| ts
81 where (tag,t') = Text.splitAt i t
82 Nothing -> Tokens $ TokenTag t <| ts
83 _ -> tokens [TokenPlain $ fst $ pairBorders p $ tokens [TokenPlain ""]] <> tn <> acc
88 case Char.generalCategory c of
89 Char.DashPunctuation -> True
90 Char.ConnectorPunctuation -> True
93 -- | Close remaining 'Pair's at end of parsing.
94 closePairs :: Pairs -> Tokens
95 closePairs (t0,ps) = dbg "closePairs" $
96 t0 <> foldl' closeUnpaired mempty ps
100 = LexemePairOpen Pair
101 | LexemePairClose Pair
102 | LexemePunctOrSym Char
110 appendLexeme :: Lexeme -> Pairs -> Pairs
111 appendLexeme lex ps =
113 case dbg "appendLexeme" lex of
114 LexemePairOpen p -> openPair p ps
115 LexemePairClose p -> closePair p ps
116 LexemePunctOrSym c -> insertToken (TokenPlain (Text.singleton c)) ps
117 LexemeWhite wh -> insertToken (TokenPlain wh) ps
118 LexemeWord wo -> insertToken (TokenPlain wo) ps
119 LexemeToken ts -> insertTokens ts ps
120 LexemeEscape c -> insertToken (TokenEscape c) ps
121 LexemeLink lnk -> insertToken (TokenLink lnk) ps
123 appendLexemes :: Pairs -> [Lexeme] -> Pairs
124 appendLexemes = foldl' (flip appendLexeme)
128 p_Tokens :: Parser e s Tokens
129 p_Tokens = closePairs <$> p_Pairs (mempty,[])
131 p_Pairs :: Pairs -> Parser e s Pairs
132 p_Pairs gs = pdbg "Pairs" $
133 (p_Lexemes (mempty == gs) >>= p_Pairs . appendLexemes gs) <|>
136 p_Lexemes :: Bool -> Parser e s [Lexeme]
137 p_Lexemes isBOF = pdbg "Lexemes" $
139 [ P.try $ p_PairCloseWhite
140 , P.try $ p_PairWhiteOpen isBOF
141 , P.try $ p_PairCloseBorder
142 , P.try $ p_PairBorderOpen
143 , P.try $ p_PairClose
145 , P.try $ pure . LexemePunctOrSym <$> p_PunctOrSym
146 , P.try $ pure <$> p_White
147 , pure . LexemeWord <$> p_Word
150 p_White :: Parser e s Lexeme
151 p_White = pdbg "White" $ LexemeWhite <$> p_Spaces
153 p_PunctOrSym :: Parser e s Char
156 Char.isPunctuation c ||
159 p_PairCloseWhite :: Parser e s [Lexeme]
160 p_PairCloseWhite = pdbg "PairCloseWhite" $
161 (\c b o -> mconcat c <> b <> mconcat o)
164 P.try p_ElemClose <|>
165 P.try p_PairClose <|>
166 pure . LexemePunctOrSym <$> p_PunctOrSym)
167 <*> (pure <$> p_White <|> P.eof $> [])
170 P.try p_ElemClose <|>
172 pure . LexemePunctOrSym <$> p_PunctOrSym)
174 p_PairWhiteOpen :: Bool -> Parser e s [Lexeme]
175 p_PairWhiteOpen isBOF = pdbg "PairWhiteOpen" $
176 (\b o -> b <> mconcat o)
177 <$> (if isBOF then return [] else pure <$> p_White)
180 P.try p_ElemClose <|>
182 pure . LexemePunctOrSym <$> p_PunctOrSym
185 p_PairCloseBorder :: Parser e s [Lexeme]
186 p_PairCloseBorder = pdbg "PairCloseBorder" $
189 p0 = pdbg "PairCloseBorder/p0" $
190 (\c b -> mconcat $ c <> b)
191 <$> P.some (P.try p_PairClose)
198 case l_PairOpen c <|> l_PairClose c of
200 Nothing -> fail "PairCloseBorder"
202 p1 = pdbg "PairCloseBorder/p1" $
203 (\c b -> mconcat c <> [LexemePunctOrSym b])
204 <$> P.some (P.try p_PairClose)
207 p_PairBorderOpen :: Parser e s [Lexeme]
208 p_PairBorderOpen = pdbg "PairBorderOpen" $
211 p0 = pdbg "PairBorderOpen/p0" $
212 (\b o -> mconcat $ b <> o)
219 case l_PairOpen c <|> l_PairClose c of
221 Nothing -> fail "PairBorderOpen"
223 <*> P.some (P.try p_PairOpen)
224 p1 = pdbg "PairBorderOpen/p1" $
225 (\b o -> LexemePunctOrSym b : mconcat o)
227 <*> P.some (P.try p_PairOpen)
229 p_PairOpen :: Parser e s [Lexeme]
230 p_PairOpen = pdbg "PairOpen" $ do
233 , P.try (pure <$> p_Escape)
234 , P.try (pure <$> p_Link)
237 case l_PairOpenOrClose LexemePairOpen c of
242 p_PairClose :: Parser e s [Lexeme]
243 p_PairClose = pdbg "PairClose" $ do
247 , P.try (pure <$> p_Escape)
248 , P.try (pure <$> p_Link)
251 case l_PairOpenOrClose LexemePairClose c of
253 _ -> fail "PairClose"
256 l_PairOpenOrClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
257 l_PairOpenOrClose lxm c =
258 l_PairOpenAndClose lxm c <|>
262 l_PairOpenAndClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
263 l_PairOpenAndClose lxm c =
265 '/' -> Just $ lxm PairSlash
266 '-' -> Just $ lxm PairDash
267 '"' -> Just $ lxm PairDoublequote
268 '\'' -> Just $ lxm PairSinglequote
269 '`' -> Just $ lxm PairBackquote
270 '_' -> Just $ lxm PairUnderscore
271 '*' -> Just $ lxm PairStar
272 '#' -> Just $ lxm PairHash
275 l_PairOpen :: Char -> Maybe Lexeme
278 '(' -> Just $ LexemePairOpen PairParen
279 '[' -> Just $ LexemePairOpen PairBracket
280 '{' -> Just $ LexemePairOpen PairBrace
281 '«' -> Just $ LexemePairOpen PairFrenchquote
284 l_PairClose :: Char -> Maybe Lexeme
287 ')' -> Just $ LexemePairClose PairParen
288 ']' -> Just $ LexemePairClose PairBracket
289 '}' -> Just $ LexemePairClose PairBrace
290 '»' -> Just $ LexemePairClose PairFrenchquote
293 p_Link :: Parser e s Lexeme
295 (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
296 <$> P.option "" (P.try p_scheme)
302 <$> P.some (P.try $ P.satisfy $ \c ->
320 p_Escape :: Parser e s Lexeme
324 <*> P.satisfy Char.isPrint
326 p_ElemSingle :: Parser e s [Lexeme]
327 p_ElemSingle = pdbg "ElemSingle" $
329 [ LexemePairOpen $ PairElem e as
330 , LexemeToken $ mempty
331 , LexemePairClose $ PairElem e [] ])
337 p_ElemOpen :: Parser e s [Lexeme]
338 p_ElemOpen = pdbg "ElemOpen" $
341 True -> [ LexemePairOpen $ PairElem e as
342 , LexemeToken $ mempty
343 , LexemePairClose $ PairElem e [] ]
344 False -> [ LexemePairOpen $ PairElem e as
345 , LexemeToken $ tokens [TokenPlain ""]
350 <*> P.option False (True <$ P.char '/')
353 p_ElemClose :: Parser e s [Lexeme]
354 p_ElemClose = pdbg "ElemClose" $
355 (\e -> [LexemePairClose $ PairElem e []])