1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.), flip)
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..), fromMaybe)
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence (ViewL(..), (<|))
19 import Data.Text (Text)
20 import Data.Text.Buildable (Buildable(..))
21 import Data.Tuple (fst,snd)
22 import Text.Show (Show(..))
23 import qualified Data.Char as Char
24 import qualified Data.Sequence as Seq
25 import qualified Data.Text as Text
26 import qualified Data.Text.Lazy as TL
27 import qualified Data.Text.Lazy.Builder as Builder
28 import qualified Text.Megaparsec as P
30 import Language.TCT.Token
31 import Language.TCT.Elem
32 import Language.TCT.Read.Elem -- hiding (pdbg)
34 -- pdbg m p = P.dbg m p
36 textOf :: Buildable a => a -> Text
37 textOf = TL.toStrict . Builder.toLazyText . build
40 type Groups = (Token,[(Group,Token)])
42 openGroup :: Group -> Groups -> Groups
43 openGroup g (t,ms) = (t,(g,mempty):ms)
45 insertToken :: Token -> Groups -> Groups
46 insertToken tok (t,[]) = (t<>tok,[])
47 insertToken tok (t,(g0,t0):gs) = (t,(g0,t0<>tok):gs)
49 -- | Close a 'Group' when there is a matching 'LexemeGroupClose'.
50 closeGroup :: Group -> Groups -> Groups
51 closeGroup g (t,[]) = dbg "closeGroup" $ (t<>TokenPlain (snd $ groupBorders g mempty),[])
52 closeGroup g (t,(g1,m1):ms) = dbg "closeGroup" $
54 (GroupElem x ax, GroupElem y ay) | x == y ->
55 insertToken (TokenGroup (GroupElem x (ax<>ay)) m1) (t,ms)
56 (x,y) | x == y -> insertToken (TokenGroup g1 m1) (t,ms)
60 (closelessGroup mempty (g1,m1))
63 -- | Close a 'Group' when there is not a matching 'LexemeGroupClose'.
64 closelessGroup :: Token -> (Group,Token) -> Token
65 closelessGroup acc (g,t) = dbg "closelessGroup" $
67 -- NOTE: try to close 'GroupHash' as 'TokenTag' instead of 'TokenPlain'.
68 GroupHash | TokenPlain p :< toks <- Seq.viewl $ unTokens $ t <> acc ->
69 case Text.findIndex (not . isTagChar) p of
70 Just 0 -> TokenPlain (fst $ groupBorders g mempty) <> t <> acc
71 Just i -> Tokens $ TokenTag tag <| TokenPlain p' <| toks
72 where (tag,p') = Text.splitAt i p
73 Nothing -> Tokens $ TokenTag p <| toks
74 _ -> TokenPlain (fst $ groupBorders g mempty) <> t <> acc
78 case Char.generalCategory c of
79 Char.DashPunctuation -> True
80 Char.ConnectorPunctuation -> True
83 -- | Close remaining 'Group's at end of parsing.
84 closeGroups :: Groups -> Token
85 closeGroups (t0,gs) = dbg "closeGroups" $
86 t0 <> foldl' closelessGroup mempty gs
90 = LexemeGroupOpen Group
91 | LexemeGroupClose Group
92 | LexemePunctOrSym Char
100 appendLexeme :: Lexeme -> Groups -> Groups
101 appendLexeme lex gs =
103 case dbg "appendLexeme" lex of
104 LexemeGroupOpen g -> openGroup g gs
105 LexemeGroupClose g -> closeGroup g gs
106 LexemePunctOrSym c -> insertToken (TokenPlain (Text.singleton c)) gs
107 LexemeWhite wh -> insertToken (TokenPlain wh) gs
108 LexemeWord wo -> insertToken (TokenPlain wo) gs
109 LexemeToken tok -> insertToken tok gs
110 LexemeEscape c -> insertToken (TokenEscape c) gs
111 LexemeLink lnk -> insertToken (TokenLink lnk) gs
113 appendLexemes :: Groups -> [Lexeme] -> Groups
114 appendLexemes = foldl' (flip appendLexeme)
118 p_Token :: Parser e s Token
119 p_Token = closeGroups <$> p_Groups (mempty,[])
121 p_Groups :: Groups -> Parser e s Groups
122 p_Groups gs = pdbg "Groups" $
123 (p_Lexemes (mempty == gs) >>= p_Groups . appendLexemes gs) <|>
126 p_Lexemes :: Bool -> Parser e s [Lexeme]
127 p_Lexemes isBOF = pdbg "Lexemes" $
129 [ P.try $ p_GroupCloseWhite
130 , P.try $ p_GroupWhiteOpen isBOF
131 , P.try $ p_GroupCloseBorder
132 , P.try $ p_GroupBorderOpen
133 , P.try $ p_GroupClose
134 , P.try $ p_GroupOpen
135 , P.try $ pure . LexemePunctOrSym <$> p_PunctOrSym
136 , P.try $ pure <$> p_White
137 , pure . LexemeWord <$> p_Word
140 p_White :: Parser e s Lexeme
141 p_White = pdbg "White" $ LexemeWhite <$> p_Spaces
143 p_PunctOrSym :: Parser e s Char
144 p_PunctOrSym = P.satisfy $ \c ->
145 Char.isPunctuation c ||
148 p_GroupCloseWhite :: Parser e s [Lexeme]
149 p_GroupCloseWhite = pdbg "GroupCloseWhite" $
150 (\c b -> mconcat c <> b)
151 <$> P.some (P.try p_GroupClose <|> pure . LexemePunctOrSym <$> p_PunctOrSym)
152 <*> ((pure <$> p_White) <|> P.eof $> [])
154 p_GroupWhiteOpen :: Bool -> Parser e s [Lexeme]
155 p_GroupWhiteOpen isBOF = pdbg "GroupWhiteOpen" $
156 (\b o -> b <> mconcat o)
157 <$> (if isBOF then return [] else pure <$> p_White)
158 <*> P.some (P.try p_GroupOpen <|> pure . LexemePunctOrSym <$> p_PunctOrSym)
160 p_GroupCloseBorder :: Parser e s [Lexeme]
161 p_GroupCloseBorder = pdbg "GroupCloseBorder" $
162 (\c b -> mconcat $ c <> b)
163 <$> P.some (P.try p_GroupClose)
164 <*> P.some (P.try $ P.choice
169 case l_GroupClose c of
172 case l_GroupOpenOrClose LexemeGroupOpen c <|> l_GroupOpen c of
173 Nothing -> return [LexemePunctOrSym c]
177 p_GroupBorderOpen :: Parser e s [Lexeme]
178 p_GroupBorderOpen = pdbg "GroupBorderOpen" $
179 (\b o -> mconcat $ b <> o)
180 <$> P.some (P.try $ P.choice
185 case l_GroupOpen c of
188 case l_GroupOpenOrClose LexemeGroupClose c <|> l_GroupClose c of
189 Nothing -> return [LexemePunctOrSym c]
192 <*> P.some (P.try p_GroupOpen)
194 p_GroupOpen :: Parser e s [Lexeme]
195 p_GroupOpen = pdbg "GroupOpen" $ do
198 , P.try (pure <$> p_Escape)
199 , P.try (pure <$> p_Link)
202 case l_GroupOpenAndClose LexemeGroupOpen c of
207 p_GroupClose :: Parser e s [Lexeme]
208 p_GroupClose = pdbg "GroupClose" $ do
212 , P.try (pure <$> p_Escape)
213 , P.try (pure <$> p_Link)
216 case l_GroupOpenAndClose LexemeGroupClose c of
221 p_GroupPlain :: (Char -> Maybe Lexeme) -> Parser e s [Lexeme]
222 p_GroupPlain grp = pdbg "GroupPlain" $ do
223 (<$> p_PunctOrSym) $ \c ->
225 LexemePunctOrSym c `fromMaybe`
228 l_GroupOpenOrClose :: (Group -> Lexeme) -> Char -> Maybe Lexeme
229 l_GroupOpenOrClose lxm c =
231 '/' -> Just $ lxm GroupSlash
232 '-' -> Just $ lxm GroupDash
233 '"' -> Just $ lxm GroupDoublequote
234 '\'' -> Just $ lxm GroupSinglequote
235 '`' -> Just $ lxm GroupBackquote
236 '_' -> Just $ lxm GroupUnderscore
237 '*' -> Just $ lxm GroupStar
238 '#' -> Just $ lxm GroupHash
241 l_GroupOpenAndClose :: (Group -> Lexeme) -> Char -> Maybe Lexeme
242 l_GroupOpenAndClose lxm c =
243 l_GroupOpenOrClose lxm c <|>
247 l_GroupOpen :: Char -> Maybe Lexeme
250 '(' -> Just $ LexemeGroupOpen GroupParen
251 '[' -> Just $ LexemeGroupOpen GroupBracket
252 '{' -> Just $ LexemeGroupOpen GroupBrace
253 '«' -> Just $ LexemeGroupOpen GroupFrenchquote
256 l_GroupClose :: Char -> Maybe Lexeme
259 ')' -> Just $ LexemeGroupClose GroupParen
260 ']' -> Just $ LexemeGroupClose GroupBracket
261 '}' -> Just $ LexemeGroupClose GroupBrace
262 '»' -> Just $ LexemeGroupClose GroupFrenchquote
265 p_Link :: Parser e s Lexeme
267 (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
268 <$> P.option "" (P.try p_scheme)
274 <$> P.some (P.satisfy $ \c ->
292 p_Escape :: Parser e s Lexeme
296 <*> P.satisfy Char.isPrint
298 p_ElemSingle :: Parser e s [Lexeme]
299 p_ElemSingle = pdbg "ElemOpen" $
301 [ LexemeGroupOpen $ GroupElem e as
302 , LexemeToken $ Tokens mempty
303 -- NOTE: encode that it's the same Elem for open and close
304 , LexemeGroupClose $ GroupElem e [] ])
310 p_ElemOpen :: Parser e s [Lexeme]
311 p_ElemOpen = pdbg "ElemOpen" $
314 True -> [ LexemeGroupOpen $ GroupElem e as
315 , LexemeToken $ Tokens mempty
316 , LexemeGroupClose $ GroupElem e [] ]
317 False -> [LexemeGroupOpen $ GroupElem e as])
321 <*> P.option False (True <$ P.char '/')
324 p_ElemClose :: Parser e s [Lexeme]
325 p_ElemClose = pdbg "ElemClose" $
326 (\e -> [LexemeGroupClose $ GroupElem e []])