1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.), flip)
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..), fromMaybe)
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence (ViewL(..), (<|))
19 import Data.Text (Text)
20 import Data.Text.Buildable (Buildable(..))
21 import Data.Tuple (fst,snd)
22 import Text.Show (Show(..))
23 import qualified Data.Char as Char
24 import qualified Data.Sequence as Seq
25 import qualified Data.Text as Text
26 import qualified Data.Text.Lazy as TL
27 import qualified Data.Text.Lazy.Builder as Builder
28 import qualified Text.Megaparsec as P
30 import Language.TCT.Token
31 import Language.TCT.Elem
32 import Language.TCT.Read.Elem -- hiding (pdbg)
34 -- pdbg m p = P.dbg m p
36 textOf :: Buildable a => a -> Text
37 textOf = TL.toStrict . Builder.toLazyText . build
40 type Pairs = (Tokens,[(Pair,Tokens)])
42 openPair :: Pair -> Pairs -> Pairs
43 openPair g (t,ms) = (t,(g,mempty):ms)
45 insertToken :: Token -> Pairs -> Pairs
46 insertToken tok (t,[]) = (t<>tokens [tok],[])
47 insertToken tok (t,(p0,t0):ps) = (t,(p0,t0<>tokens [tok]):ps)
49 insertTokens :: Tokens -> Pairs -> Pairs
50 insertTokens toks (t,[]) = (t<>toks,[])
51 insertTokens toks (t,(p0,t0):ps) = (t,(p0,t0<>toks):ps)
53 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
54 closePair :: Pair -> Pairs -> Pairs
55 closePair p (t,[]) = dbg "closePair" $ (t<>tokens [TokenPlain (snd $ pairBorders p mempty)],[])
56 closePair p (t,(p1,t1):ts) = dbg "closePair" $
58 (PairElem x ax, PairElem y ay) | x == y ->
59 insertToken (TokenPair (PairElem x (ax<>ay)) t1) (t,ts)
60 (x,y) | x == y -> insertToken (TokenPair p1 t1) (t,ts)
64 (closeUnpaired mempty (p1,t1))
67 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
68 closeUnpaired :: Tokens -> (Pair,Tokens) -> Tokens
69 closeUnpaired acc (p,tn) = dbg "closeUnpaired" $
71 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
72 PairHash | TokenPlain t :< ts <- Seq.viewl $ unTokens $ tn <> acc ->
73 case Text.findIndex (not . isTagChar) t of
74 Just 0 -> tokens [TokenPlain $ fst $ pairBorders p mempty] <> tn <> acc
75 Just i -> Tokens $ TokenTag tag <| TokenPlain t' <| ts
76 where (tag,t') = Text.splitAt i t
77 Nothing -> Tokens $ TokenTag t <| ts
78 _ -> tokens [TokenPlain $ fst $ pairBorders p mempty] <> tn <> acc
83 case Char.generalCategory c of
84 Char.DashPunctuation -> True
85 Char.ConnectorPunctuation -> True
88 -- | Close remaining 'Pair's at end of parsing.
89 closePairs :: Pairs -> Tokens
90 closePairs (t0,ps) = dbg "closePairs" $
91 t0 <> foldl' closeUnpaired mempty ps
96 | LexemePairClose Pair
97 | LexemePunctOrSym Char
105 appendLexeme :: Lexeme -> Pairs -> Pairs
106 appendLexeme lex ps =
108 case dbg "appendLexeme" lex of
109 LexemePairOpen p -> openPair p ps
110 LexemePairClose p -> closePair p ps
111 LexemePunctOrSym c -> insertToken (TokenPlain (Text.singleton c)) ps
112 LexemeWhite wh -> insertToken (TokenPlain wh) ps
113 LexemeWord wo -> insertToken (TokenPlain wo) ps
114 LexemeToken ts -> insertTokens ts ps
115 LexemeEscape c -> insertToken (TokenEscape c) ps
116 LexemeLink lnk -> insertToken (TokenLink lnk) ps
118 appendLexemes :: Pairs -> [Lexeme] -> Pairs
119 appendLexemes = foldl' (flip appendLexeme)
123 p_Tokens :: Parser e s Tokens
124 p_Tokens = closePairs <$> p_Pairs (mempty,[])
126 p_Pairs :: Pairs -> Parser e s Pairs
127 p_Pairs gs = pdbg "Pairs" $
128 (p_Lexemes (mempty == gs) >>= p_Pairs . appendLexemes gs) <|>
131 p_Lexemes :: Bool -> Parser e s [Lexeme]
132 p_Lexemes isBOF = pdbg "Lexemes" $
134 [ P.try $ p_PairCloseWhite
135 , P.try $ p_PairWhiteOpen isBOF
136 , P.try $ p_PairCloseBorder
137 , P.try $ p_PairBorderOpen
138 , P.try $ p_PairClose
140 , P.try $ pure . LexemePunctOrSym <$> p_PunctOrSym
141 , P.try $ pure <$> p_White
142 , pure . LexemeWord <$> p_Word
145 p_White :: Parser e s Lexeme
146 p_White = pdbg "White" $ LexemeWhite <$> p_Spaces
148 p_PunctOrSym :: Parser e s Char
149 p_PunctOrSym = P.satisfy $ \c ->
150 Char.isPunctuation c ||
153 p_PairCloseWhite :: Parser e s [Lexeme]
154 p_PairCloseWhite = pdbg "PairCloseWhite" $
155 (\c b -> mconcat c <> b)
156 <$> P.some (P.try p_PairClose <|> pure . LexemePunctOrSym <$> p_PunctOrSym)
157 <*> ((pure <$> p_White) <|> P.eof $> [])
159 p_PairWhiteOpen :: Bool -> Parser e s [Lexeme]
160 p_PairWhiteOpen isBOF = pdbg "PairWhiteOpen" $
161 (\b o -> b <> mconcat o)
162 <$> (if isBOF then return [] else pure <$> p_White)
163 <*> P.some (P.try p_PairOpen <|> pure . LexemePunctOrSym <$> p_PunctOrSym)
165 p_PairCloseBorder :: Parser e s [Lexeme]
166 p_PairCloseBorder = pdbg "PairCloseBorder" $
170 (\c b -> mconcat $ c <> b)
171 <$> P.some (P.try p_PairClose)
172 <*> P.some (P.try $ P.choice
177 case l_PairClose c of
180 case l_PairOpenAndClose LexemePairOpen c <|> l_PairOpen c of
181 Nothing -> return [LexemePunctOrSym c]
185 (\c b -> mconcat c <> [LexemePunctOrSym b])
186 <$> P.some (P.try p_PairClose)
189 p_PairBorderOpen :: Parser e s [Lexeme]
190 p_PairBorderOpen = pdbg "PairBorderOpen" $
194 (\b o -> mconcat $ b <> o)
195 <$> P.some (P.try $ P.choice
200 case l_PairOpen c <|> l_PairClose c of
204 <*> P.some (P.try p_PairOpen)
206 (\b o -> LexemePunctOrSym b : mconcat o)
208 <*> P.some (P.try p_PairOpen)
210 p_PairOpen :: Parser e s [Lexeme]
211 p_PairOpen = pdbg "PairOpen" $ do
214 , P.try (pure <$> p_Escape)
215 , P.try (pure <$> p_Link)
218 case l_PairOpenOrClose LexemePairOpen c of
223 p_PairClose :: Parser e s [Lexeme]
224 p_PairClose = pdbg "PairClose" $ do
228 , P.try (pure <$> p_Escape)
229 , P.try (pure <$> p_Link)
232 case l_PairOpenOrClose LexemePairClose c of
237 p_PairPlain :: (Char -> Maybe Lexeme) -> Parser e s [Lexeme]
238 p_PairPlain pair = pdbg "PairPlain" $ do
239 (<$> p_PunctOrSym) $ \c ->
241 LexemePunctOrSym c `fromMaybe`
244 l_PairOpenAndClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
245 l_PairOpenAndClose lxm c =
247 '/' -> Just $ lxm PairSlash
248 '-' -> Just $ lxm PairDash
249 '"' -> Just $ lxm PairDoublequote
250 '\'' -> Just $ lxm PairSinglequote
251 '`' -> Just $ lxm PairBackquote
252 '_' -> Just $ lxm PairUnderscore
253 '*' -> Just $ lxm PairStar
254 '#' -> Just $ lxm PairHash
257 l_PairOpenOrClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
258 l_PairOpenOrClose lxm c =
259 l_PairOpenAndClose lxm c <|>
263 l_PairOpen :: Char -> Maybe Lexeme
266 '(' -> Just $ LexemePairOpen PairParen
267 '[' -> Just $ LexemePairOpen PairBracket
268 '{' -> Just $ LexemePairOpen PairBrace
269 '«' -> Just $ LexemePairOpen PairFrenchquote
272 l_PairClose :: Char -> Maybe Lexeme
275 ')' -> Just $ LexemePairClose PairParen
276 ']' -> Just $ LexemePairClose PairBracket
277 '}' -> Just $ LexemePairClose PairBrace
278 '»' -> Just $ LexemePairClose PairFrenchquote
281 p_Link :: Parser e s Lexeme
283 (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
284 <$> P.option "" (P.try p_scheme)
290 <$> P.some (P.satisfy $ \c ->
308 p_Escape :: Parser e s Lexeme
312 <*> P.satisfy Char.isPrint
314 p_ElemSingle :: Parser e s [Lexeme]
315 p_ElemSingle = pdbg "ElemOpen" $
317 [ LexemePairOpen $ PairElem e as
318 , LexemeToken $ Tokens mempty
319 -- NOTE: encode that it's the same Elem for open and close
320 , LexemePairClose $ PairElem e [] ])
326 p_ElemOpen :: Parser e s [Lexeme]
327 p_ElemOpen = pdbg "ElemOpen" $
330 True -> [ LexemePairOpen $ PairElem e as
331 , LexemeToken $ Tokens mempty
332 , LexemePairClose $ PairElem e [] ]
333 False -> [LexemePairOpen $ PairElem e as])
337 <*> P.option False (True <$ P.char '/')
340 p_ElemClose :: Parser e s [Lexeme]
341 p_ElemClose = pdbg "ElemClose" $
342 (\e -> [LexemePairClose $ PairElem e []])