1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.), flip)
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..), fromMaybe)
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence (ViewL(..), (<|))
19 import Data.Text (Text)
20 import Data.Text.Buildable (Buildable(..))
21 import Data.Tuple (fst,snd)
22 import Text.Show (Show(..))
23 import qualified Data.Char as Char
24 import qualified Data.Sequence as Seq
25 import qualified Data.Text as Text
26 import qualified Data.Text.Lazy as TL
27 import qualified Data.Text.Lazy.Builder as Builder
28 import qualified Text.Megaparsec as P
30 import Language.TCT.Token
31 import Language.TCT.Elem -- hiding (dbg)
32 import Language.TCT.Read.Elem -- hiding (pdbg)
35 import Debug.Trace (trace)
36 dbg m x = trace (m <> ": " <> show x) x
40 textOf :: Buildable a => a -> Text
41 textOf = TL.toStrict . Builder.toLazyText . build
44 type Pairs = (Tokens,[(Pair,Tokens)])
46 openPair :: Pair -> Pairs -> Pairs
47 openPair g (t,ms) = (t,(g,mempty):ms)
49 insertToken :: Token -> Pairs -> Pairs
50 insertToken tok (t,[]) = (t<>tokens [tok],[])
51 insertToken tok (t,(p0,t0):ps) = (t,(p0,t0<>tokens [tok]):ps)
53 insertTokens :: Tokens -> Pairs -> Pairs
54 insertTokens toks (t,[]) = (t<>toks,[])
55 insertTokens toks (t,(p0,t0):ps) = (t,(p0,t0<>toks):ps)
57 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
58 closePair :: Pair -> Pairs -> Pairs
59 closePair p (t,[]) = dbg "closePair" $ (t<>tokens [TokenPlain (snd $ pairBorders p mempty)],[])
60 closePair p (t,(p1,t1):ts) = dbg "closePair" $
62 (PairElem x ax, PairElem y ay) | x == y ->
63 insertToken (TokenPair (PairElem x (ax<>ay)) t1) (t,ts)
64 (x,y) | x == y -> insertToken (TokenPair p1 t1) (t,ts)
68 (closeUnpaired mempty (p1,t1))
71 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
72 closeUnpaired :: Tokens -> (Pair,Tokens) -> Tokens
73 closeUnpaired acc (p,tn) = dbg "closeUnpaired" $
75 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
76 PairHash | TokenPlain t :< ts <- Seq.viewl $ unTokens $ tn <> acc ->
77 case Text.findIndex (not . isTagChar) t of
78 Just 0 -> tokens [TokenPlain $ fst $ pairBorders p mempty] <> tn <> acc
79 Just i -> Tokens $ TokenTag tag <| TokenPlain t' <| ts
80 where (tag,t') = Text.splitAt i t
81 Nothing -> Tokens $ TokenTag t <| ts
82 _ -> tokens [TokenPlain $ fst $ pairBorders p $ tokens [TokenPlain ""]] <> tn <> acc
87 case Char.generalCategory c of
88 Char.DashPunctuation -> True
89 Char.ConnectorPunctuation -> True
92 -- | Close remaining 'Pair's at end of parsing.
93 closePairs :: Pairs -> Tokens
94 closePairs (t0,ps) = dbg "closePairs" $
95 t0 <> foldl' closeUnpaired mempty ps
100 | LexemePairClose Pair
101 | LexemePunctOrSym Char
109 appendLexeme :: Lexeme -> Pairs -> Pairs
110 appendLexeme lex ps =
112 case dbg "appendLexeme" lex of
113 LexemePairOpen p -> openPair p ps
114 LexemePairClose p -> closePair p ps
115 LexemePunctOrSym c -> insertToken (TokenPlain (Text.singleton c)) ps
116 LexemeWhite wh -> insertToken (TokenPlain wh) ps
117 LexemeWord wo -> insertToken (TokenPlain wo) ps
118 LexemeToken ts -> insertTokens ts ps
119 LexemeEscape c -> insertToken (TokenEscape c) ps
120 LexemeLink lnk -> insertToken (TokenLink lnk) ps
122 appendLexemes :: Pairs -> [Lexeme] -> Pairs
123 appendLexemes = foldl' (flip appendLexeme)
127 p_Tokens :: Parser e s Tokens
128 p_Tokens = closePairs <$> p_Pairs (mempty,[])
130 p_Pairs :: Pairs -> Parser e s Pairs
131 p_Pairs gs = pdbg "Pairs" $
132 (p_Lexemes (mempty == gs) >>= p_Pairs . appendLexemes gs) <|>
135 p_Lexemes :: Bool -> Parser e s [Lexeme]
136 p_Lexemes isBOF = pdbg "Lexemes" $
138 [ P.try $ p_PairCloseWhite
139 , P.try $ p_PairWhiteOpen isBOF
140 , P.try $ p_PairCloseBorder
141 , P.try $ p_PairBorderOpen
142 , P.try $ p_PairClose
144 , P.try $ pure . LexemePunctOrSym <$> p_PunctOrSym
145 , P.try $ pure <$> p_White
146 , pure . LexemeWord <$> p_Word
149 p_White :: Parser e s Lexeme
150 p_White = pdbg "White" $ LexemeWhite <$> p_Spaces
152 p_PunctOrSym :: Parser e s Char
153 p_PunctOrSym = P.satisfy $ \c ->
154 Char.isPunctuation c ||
157 p_PairCloseWhite :: Parser e s [Lexeme]
158 p_PairCloseWhite = pdbg "PairCloseWhite" $
159 (\c b -> mconcat c <> b)
162 P.try p_ElemClose <|>
163 P.try p_PairClose <|>
164 pure . LexemePunctOrSym <$> p_PunctOrSym
166 <*> ((pure <$> p_White) <|> P.eof $> [])
168 p_PairWhiteOpen :: Bool -> Parser e s [Lexeme]
169 p_PairWhiteOpen isBOF = pdbg "PairWhiteOpen" $
170 (\b o -> b <> mconcat o)
171 <$> (if isBOF then return [] else pure <$> p_White)
174 P.try p_ElemClose <|>
176 pure . LexemePunctOrSym <$> p_PunctOrSym
179 p_PairCloseBorder :: Parser e s [Lexeme]
180 p_PairCloseBorder = pdbg "PairCloseBorder" $
184 (\c b -> mconcat $ c <> b)
185 <$> P.some (P.try p_PairClose)
186 <*> P.some (P.try $ P.choice
191 case l_PairOpen c <|> l_PairClose c of
196 (\c b -> mconcat c <> [LexemePunctOrSym b])
197 <$> P.some (P.try p_PairClose)
200 p_PairBorderOpen :: Parser e s [Lexeme]
201 p_PairBorderOpen = pdbg "PairBorderOpen" $
205 (\b o -> mconcat $ b <> o)
206 <$> P.some (P.try $ P.choice
211 case l_PairOpen c <|> l_PairClose c of
215 <*> P.some (P.try p_PairOpen)
217 (\b o -> LexemePunctOrSym b : mconcat o)
219 <*> P.some (P.try p_PairOpen)
221 p_PairOpen :: Parser e s [Lexeme]
222 p_PairOpen = pdbg "PairOpen" $ do
225 , P.try (pure <$> p_Escape)
226 , P.try (pure <$> p_Link)
229 case l_PairOpenOrClose LexemePairOpen c of
234 p_PairClose :: Parser e s [Lexeme]
235 p_PairClose = pdbg "PairClose" $ do
239 , P.try (pure <$> p_Escape)
240 , P.try (pure <$> p_Link)
243 case l_PairOpenOrClose LexemePairClose c of
248 p_PairPlain :: (Char -> Maybe Lexeme) -> Parser e s [Lexeme]
249 p_PairPlain pair = pdbg "PairPlain" $ do
250 (<$> p_PunctOrSym) $ \c ->
252 LexemePunctOrSym c `fromMaybe`
255 l_PairOpenAndClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
256 l_PairOpenAndClose lxm c =
258 '/' -> Just $ lxm PairSlash
259 '-' -> Just $ lxm PairDash
260 '"' -> Just $ lxm PairDoublequote
261 '\'' -> Just $ lxm PairSinglequote
262 '`' -> Just $ lxm PairBackquote
263 '_' -> Just $ lxm PairUnderscore
264 '*' -> Just $ lxm PairStar
265 '#' -> Just $ lxm PairHash
268 l_PairOpenOrClose :: (Pair -> Lexeme) -> Char -> Maybe Lexeme
269 l_PairOpenOrClose lxm c =
270 l_PairOpenAndClose lxm c <|>
274 l_PairOpen :: Char -> Maybe Lexeme
277 '(' -> Just $ LexemePairOpen PairParen
278 '[' -> Just $ LexemePairOpen PairBracket
279 '{' -> Just $ LexemePairOpen PairBrace
280 '«' -> Just $ LexemePairOpen PairFrenchquote
283 l_PairClose :: Char -> Maybe Lexeme
286 ')' -> Just $ LexemePairClose PairParen
287 ']' -> Just $ LexemePairClose PairBracket
288 '}' -> Just $ LexemePairClose PairBrace
289 '»' -> Just $ LexemePairClose PairFrenchquote
292 p_Link :: Parser e s Lexeme
294 (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
295 <$> P.option "" (P.try p_scheme)
301 <$> P.some (P.satisfy $ \c ->
319 p_Escape :: Parser e s Lexeme
323 <*> P.satisfy Char.isPrint
325 p_ElemSingle :: Parser e s [Lexeme]
326 p_ElemSingle = pdbg "ElemSingle" $
328 [ LexemePairOpen $ PairElem e as
329 , LexemeToken $ mempty
330 , LexemePairClose $ PairElem e [] ])
336 p_ElemOpen :: Parser e s [Lexeme]
337 p_ElemOpen = pdbg "ElemOpen" $
340 True -> [ LexemePairOpen $ PairElem e as
341 , LexemeToken $ mempty
342 , LexemePairClose $ PairElem e [] ]
343 False -> [ LexemePairOpen $ PairElem e as
344 , LexemeToken $ tokens [TokenPlain ""]
349 <*> P.option False (True <$ P.char '/')
352 p_ElemClose :: Parser e s [Lexeme]
353 p_ElemClose = pdbg "ElemClose" $
354 (\e -> [LexemePairClose $ PairElem e []])