1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.), flip)
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..))
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence (ViewL(..), (<|))
19 import Data.Text (Text)
20 import Data.Text.Buildable (Buildable(..))
21 import Data.Tuple (fst,snd)
22 import Text.Show (Show(..))
23 import qualified Data.Char as Char
24 import qualified Data.Sequence as Seq
25 import qualified Data.Text as Text
26 import qualified Data.Text.Lazy as TL
27 import qualified Data.Text.Lazy.Builder as Builder
28 import qualified Text.Megaparsec as P
30 import Language.TCT.Token
31 import Language.TCT.Elem -- hiding (dbg)
32 import Language.TCT.Read.Elem -- hiding (pdbg)
35 import Debug.Trace (trace)
36 dbg m x = trace (m <> ": " <> show x) x
40 textOf :: Buildable a => a -> Text
41 textOf = TL.toStrict . Builder.toLazyText . build
44 type Pairs = (Tokens,[(Pair,Tokens)])
46 openPair :: Pair -> Pairs -> Pairs
47 openPair g (t,ms) = (t,(g,mempty):ms)
49 insertToken :: Token -> Pairs -> Pairs
50 insertToken tok (t,[]) = (t<>tokens [tok],[])
51 insertToken tok (t,(p0,t0):ps) = (t,(p0,t0<>tokens [tok]):ps)
53 insertTokens :: Tokens -> Pairs -> Pairs
54 insertTokens toks (t,[]) = (t<>toks,[])
55 insertTokens toks (t,(p0,t0):ps) = (t,(p0,t0<>toks):ps)
57 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
58 closePair :: Pair -> Pairs -> Pairs
59 closePair p (t,[]) = dbg "closePair" $
60 (t<>tokens [TokenPlain $ snd $ pairBorders p $ tokens [TokenPlain ""]],[])
61 closePair p (t,(p1,t1):ts) = dbg "closePair" $
63 (PairElem x ax, PairElem y ay) | x == y ->
64 insertToken (TokenPair (PairElem x (ax<>ay)) t1) (t,ts)
65 (x,y) | x == y -> insertToken (TokenPair p1 t1) (t,ts)
69 (closeUnpaired mempty (p1,t1))
72 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
73 closeUnpaired :: Tokens -> (Pair,Tokens) -> Tokens
74 closeUnpaired acc (p,tn) = dbg "closeUnpaired" $
76 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
77 PairHash | TokenPlain t :< ts <- Seq.viewl $ unTokens $ tn <> acc ->
78 case Text.findIndex (not . isTagChar) t of
79 Just 0 -> tokens [TokenPlain $ fst $ pairBorders p mempty] <> tn <> acc
80 Just i -> Tokens $ TokenTag tag <| TokenPlain t' <| ts
81 where (tag,t') = Text.splitAt i t
82 Nothing -> Tokens $ TokenTag t <| ts
83 _ -> tokens [TokenPlain $ fst $ pairBorders p $ tokens [TokenPlain ""]] <> tn <> acc
88 case Char.generalCategory c of
89 Char.DashPunctuation -> True
90 Char.ConnectorPunctuation -> True
93 -- | Close remaining 'Pair's at end of parsing.
94 closePairs :: Pairs -> Tokens
95 closePairs (t0,ps) = dbg "closePairs" $
96 t0 <> foldl' closeUnpaired mempty ps
100 = LexemePairOpen !Pair
101 | LexemePairClose !Pair
102 | LexemePunctOrSym !Char
105 | LexemeToken !Tokens
110 appendLexeme :: Lexeme -> Pairs -> Pairs
111 appendLexeme lex ps =
113 case dbg "appendLexeme" lex of
114 LexemePairOpen p -> openPair p ps
115 LexemePairClose p -> closePair p ps
116 LexemePunctOrSym c -> insertToken (TokenPlain (Text.singleton c)) ps
117 LexemeWhite wh -> insertToken (TokenPlain wh) ps
118 LexemeWord wo -> insertToken (TokenPlain wo) ps
119 LexemeToken ts -> insertTokens ts ps
120 LexemeEscape c -> insertToken (TokenEscape c) ps
121 LexemeLink lnk -> insertToken (TokenLink lnk) ps
123 appendLexemes :: Pairs -> [Lexeme] -> Pairs
124 appendLexemes = foldl' (flip appendLexeme)
128 p_Tokens :: Parser e s Tokens
129 p_Tokens = closePairs <$> p_Pairs (mempty,[])
131 p_Pairs :: Pairs -> Parser e s Pairs
132 p_Pairs gs = pdbg "Pairs" $
133 (p_Lexemes (mempty == gs) >>= p_Pairs . appendLexemes gs) <|>
136 p_Lexemes :: Bool -> Parser e s [Lexeme]
137 p_Lexemes isBOF = pdbg "Lexemes" $
139 [ P.try $ p_PairCloseWhite
140 , P.try $ p_PairWhiteOpen isBOF
141 , P.try $ p_PairCloseBorder
142 , P.try $ p_PairBorderOpen
143 , P.try $ p_PairClose
145 , P.try $ pure . LexemePunctOrSym <$> p_PunctOrSym
146 , P.try $ pure <$> p_White
147 , pure . LexemeWord <$> p_Word
150 p_White :: Parser e s Lexeme
151 p_White = pdbg "White" $ LexemeWhite <$> p_Spaces
153 p_PunctOrSym :: Parser e s Char
156 Char.isPunctuation c ||
159 p_PairCloseWhite :: Parser e s [Lexeme]
160 p_PairCloseWhite = pdbg "PairCloseWhite" $
161 (\c b o -> mconcat c <> b <> mconcat o)
164 P.try p_ElemClose <|>
165 P.try p_PairClose <|>
166 pure . LexemePunctOrSym <$> p_PunctOrSym)
167 <*> (pure <$> p_White <|> P.eof $> [])
170 P.try p_ElemClose <|>
172 pure . LexemePunctOrSym <$> p_PunctOrSym)
174 p_PairWhiteOpen :: Bool -> Parser e s [Lexeme]
175 p_PairWhiteOpen isBOF = pdbg "PairWhiteOpen" $
176 (\b o -> b <> mconcat o)
177 <$> (if isBOF then return [] else pure <$> p_White)
180 P.try p_ElemClose <|>
182 pure . LexemePunctOrSym <$> p_PunctOrSym)
184 p_PairCloseBorder :: Parser e s [Lexeme]
185 p_PairCloseBorder = pdbg "PairCloseBorder" $
189 (\c b -> mconcat $ c <> b)
190 <$> P.some (P.try p_PairClose)
197 case l_PairOpen c <|> l_PairClose c of
199 Nothing -> fail "PairCloseBorder"
202 (\c b -> mconcat c <> [LexemePunctOrSym b])
203 <$> P.some (P.try p_PairClose)
206 p_PairBorderOpen :: Parser e s [Lexeme]
207 p_PairBorderOpen = pdbg "PairBorderOpen" $
211 (\b o -> mconcat $ b <> o)
218 case l_PairOpen c <|> l_PairClose c of
220 Nothing -> fail "PairBorderOpen"
222 <*> P.some (P.try p_PairOpen)
224 (\b o -> LexemePunctOrSym b : mconcat o)
226 <*> P.some (P.try p_PairOpen)
228 p_PairOpen :: Parser e s [Lexeme]
229 p_PairOpen = pdbg "PairOpen" $ do
232 , P.try (pure <$> p_Escape)
233 , P.try (pure <$> p_Link)
236 case l_PairOpenOrClose LexemePairOpen c of
241 p_PairClose :: Parser e s [Lexeme]
242 p_PairClose = pdbg "PairClose" $ do
246 , P.try (pure <$> p_Escape)
247 , P.try (pure <$> p_Link)
250 case l_PairOpenOrClose LexemePairClose c of
252 _ -> fail "PairClose"
255 l_PairOpenOrClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
256 l_PairOpenOrClose lxm c =
257 l_PairOpenAndClose lxm c <|>
261 l_PairOpenAndClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
262 l_PairOpenAndClose lxm c =
264 '/' -> Just $ lxm PairSlash
265 '-' -> Just $ lxm PairDash
266 '"' -> Just $ lxm PairDoublequote
267 '\'' -> Just $ lxm PairSinglequote
268 '`' -> Just $ lxm PairBackquote
269 '_' -> Just $ lxm PairUnderscore
270 '*' -> Just $ lxm PairStar
271 '#' -> Just $ lxm PairHash
274 l_PairOpen :: Char -> Maybe Lexeme
277 '(' -> Just $ LexemePairOpen PairParen
278 '[' -> Just $ LexemePairOpen PairBracket
279 '{' -> Just $ LexemePairOpen PairBrace
280 '«' -> Just $ LexemePairOpen PairFrenchquote
283 l_PairClose :: Char -> Maybe Lexeme
286 ')' -> Just $ LexemePairClose PairParen
287 ']' -> Just $ LexemePairClose PairBracket
288 '}' -> Just $ LexemePairClose PairBrace
289 '»' -> Just $ LexemePairClose PairFrenchquote
292 p_Link :: Parser e s Lexeme
294 (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
295 <$> P.option "" (P.try p_scheme)
301 <$> P.some (P.satisfy $ \c ->
319 p_Escape :: Parser e s Lexeme
323 <*> P.satisfy Char.isPrint
325 p_ElemSingle :: Parser e s [Lexeme]
326 p_ElemSingle = pdbg "ElemSingle" $
328 [ LexemePairOpen $ PairElem e as
329 , LexemeToken $ mempty
330 , LexemePairClose $ PairElem e [] ])
336 p_ElemOpen :: Parser e s [Lexeme]
337 p_ElemOpen = pdbg "ElemOpen" $
340 True -> [ LexemePairOpen $ PairElem e as
341 , LexemeToken $ mempty
342 , LexemePairClose $ PairElem e [] ]
343 False -> [ LexemePairOpen $ PairElem e as
344 , LexemeToken $ tokens [TokenPlain ""]
349 <*> P.option False (True <$ P.char '/')
352 p_ElemClose :: Parser e s [Lexeme]
353 p_ElemClose = pdbg "ElemClose" $
354 (\e -> [LexemePairClose $ PairElem e []])