1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.), flip)
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..), fromMaybe)
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence (ViewL(..), (<|))
19 import Data.Text (Text)
20 import Data.Text.Buildable (Buildable(..))
21 import Data.Tuple (fst,snd)
22 import Text.Show (Show(..))
23 import qualified Data.Char as Char
24 import qualified Data.Sequence as Seq
25 import qualified Data.Text as Text
26 import qualified Data.Text.Lazy as TL
27 import qualified Data.Text.Lazy.Builder as Builder
28 import qualified Text.Megaparsec as P
30 import Language.TCT.Token
31 import Language.TCT.Elem
32 import Language.TCT.Read.Elem
34 textOf :: Buildable a => a -> Text
35 textOf = TL.toStrict . Builder.toLazyText . build
38 type Groups = (Token,[(Group,Token)])
40 openGroup :: Group -> Groups -> Groups
41 openGroup g (t,ms) = (t,(g,mempty):ms)
43 insertToken :: Token -> Groups -> Groups
44 insertToken tok (t,[]) = (t<>tok,[])
45 insertToken tok (t,(g0,t0):gs) = (t,(g0,t0<>tok):gs)
47 closeGroup :: Group -> Groups -> Groups
48 closeGroup g (t,[]) = dbg "closeGroup" $ (t<>TokenPlain (snd $ groupBorders g mempty),[])
49 closeGroup g (t,(g1,m1):ms) = dbg "closeGroup" $
51 (GroupElem x ax, GroupElem y ay) | x == y ->
52 insertToken (TokenGroup (GroupElem x (ax<>ay)) m1) (t,ms)
53 (x,y) | x == y -> insertToken (TokenGroup g1 m1) (t,ms)
57 (TokenPlain (fst $ groupBorders g1 mempty) <> m1)
60 closeGroups :: Groups -> Token
61 closeGroups (t0,gs) = dbg "closeGroups" $
65 -- NOTE: try to close 'GroupHash' as 'TokenTag' instead of 'TokenPlain'.
66 GroupHash | TokenPlain p:<toks <- Seq.viewl $ unTokens $ t <> acc ->
67 case Text.findIndex (not . isTagChar) p of
68 Just 0 -> TokenPlain (fst $ groupBorders g mempty) <> t <> acc
70 let (tag,p') = Text.splitAt i p in
71 Tokens $ TokenTag tag<|TokenPlain p'<|toks
73 Tokens $ TokenTag p<|toks
74 _ -> TokenPlain (fst $ groupBorders g mempty) <> t <> acc
79 case Char.generalCategory c of
80 Char.DashPunctuation -> True
81 Char.ConnectorPunctuation -> True
86 = LexemeGroupOpen Group
87 | LexemeGroupClose Group
88 | LexemeGroupPlain Char
96 appendLexeme :: Lexeme -> Groups -> Groups
99 case dbg "appendLexeme:" lex of
100 LexemeGroupOpen g -> openGroup g gs
101 LexemeGroupClose g -> closeGroup g gs
102 LexemeGroupPlain c -> insertToken (TokenPlain (Text.singleton c)) gs
103 LexemeWhite wh -> insertToken (TokenPlain wh) gs
104 LexemeWord wo -> insertToken (TokenPlain wo) gs
105 LexemeToken tok -> insertToken tok gs
106 LexemeEscape c -> insertToken (TokenEscape c) gs
107 LexemeLink lnk -> insertToken (TokenLink lnk) gs
109 appendLexemes :: Groups -> [Lexeme] -> Groups
110 appendLexemes = foldl' (flip appendLexeme)
114 p_Token :: Parser e s Token
115 p_Token = closeGroups <$> p_Groups (mempty,[])
117 p_Groups :: Groups -> Parser e s Groups
118 p_Groups gs = pdbg "Groups" $
120 (p_Lexemes (mempty == gs) >>= p_Groups . appendLexemes gs)
123 p_Lexemes :: Bool -> Parser e s [Lexeme]
124 p_Lexemes isBOF = pdbg "Lexemes" $
126 [ P.try $ p_GroupClose
127 , P.try $ p_GroupOpen isBOF
128 , P.try $ p_GroupOpenOrClose l_GroupOpenOrClose
129 , P.try $ pure <$> p_White
130 , pure . LexemeWord <$> p_Word
133 p_White :: Parser e s Lexeme
134 p_White = pdbg "White" $
135 LexemeWhite <$> p_Spaces
137 p_PunctOrSym :: Parser e s Char
138 p_PunctOrSym = P.satisfy $ \c -> Char.isPunctuation c || Char.isSymbol c
140 p_GroupOpen :: Bool -> Parser e s [Lexeme]
141 p_GroupOpen isBOF = pdbg "GroupOpen" $ do
142 wh <- if isBOF then return [] else pure <$> p_White
143 ps <- mconcat <$> P.some (P.try $ p_GroupOpenOrClose $ l_GroupOpenAndClose LexemeGroupOpen)
146 p_GroupClose :: Parser e s [Lexeme]
147 p_GroupClose = pdbg "GroupClose" $ do
148 ps <- mconcat <$> P.some (P.try $ p_GroupOpenOrClose $ l_GroupOpenAndClose LexemeGroupClose)
149 wh <- pure <$> p_White <|> P.eof $> []
152 p_GroupOpenOrClose :: (Char -> Maybe Lexeme) -> Parser e s [Lexeme]
153 p_GroupOpenOrClose grp = pdbg "GroupOpenOrClose" $ do
156 , P.try (pure <$> p_Escape)
157 , P.try (pure <$> p_Link)
158 , (<$> p_PunctOrSym) $ \c ->
160 LexemeGroupPlain c `fromMaybe`
164 l_GroupOpenAndClose :: (Group -> Lexeme) -> Char -> Maybe Lexeme
165 l_GroupOpenAndClose lxm c = dbg "GroupOpenAndClose" $
167 '/' -> Just $ lxm GroupSlash
168 '-' -> Just $ lxm GroupDash
169 '"' -> Just $ lxm GroupDoublequote
170 '\'' -> Just $ lxm GroupSinglequote
171 '`' -> Just $ lxm GroupBackquote
172 '_' -> Just $ lxm GroupUnderscore
173 '*' -> Just $ lxm GroupStar
174 '#' -> Just $ lxm GroupHash
175 _ -> l_GroupOpenOrClose c
177 l_GroupOpenOrClose :: Char -> Maybe Lexeme
178 l_GroupOpenOrClose c = dbg "GroupOpenOrClose" $
180 '(' -> Just $ LexemeGroupOpen GroupParen
181 '[' -> Just $ LexemeGroupOpen GroupBracket
182 '{' -> Just $ LexemeGroupOpen GroupBrace
183 '«' -> Just $ LexemeGroupOpen GroupFrenchquote
184 ')' -> Just $ LexemeGroupClose GroupParen
185 ']' -> Just $ LexemeGroupClose GroupBracket
186 '}' -> Just $ LexemeGroupClose GroupBrace
187 '»' -> Just $ LexemeGroupClose GroupFrenchquote
188 '#' -> Just $ LexemeGroupOpen GroupHash
191 p_Link :: Parser e s Lexeme
193 (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
194 <$> P.option "" (P.try p_scheme)
200 <$> P.some (P.satisfy $ \c ->
218 p_Escape :: Parser e s Lexeme
222 <*> P.satisfy Char.isPrint
224 p_Elem :: Parser e s [Lexeme]
225 p_Elem = pdbg "Elem" $ P.char '<' >> (p_close <|> p_open)
230 True -> [ LexemeGroupOpen $ GroupElem e as
231 , LexemeToken $ Tokens mempty -- same elem for open and close
232 , LexemeGroupClose $ GroupElem e [] ]
233 False -> [LexemeGroupOpen $ GroupElem e as])
236 <*> P.option False (True <$ P.char '/')
239 (\e -> [LexemeGroupClose $ GroupElem e []])