]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read/Token.hs
Fix Token parsing.
[doclang.git] / Language / TCT / Read / Token.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
6
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
9 import Data.Bool
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.), flip)
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..), fromMaybe)
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Text (Text)
19 import Data.Text.Buildable (Buildable(..))
20 import Data.Tuple (fst,snd)
21 import Text.Show (Show(..))
22 import qualified Data.Char as Char
23 import qualified Data.Text as Text
24 import qualified Data.Text.Lazy as TL
25 import qualified Data.Text.Lazy.Builder as Builder
26 import qualified Text.Megaparsec as P
27
28 import Language.TCT.Token
29 import Language.TCT.Elem
30 import Language.TCT.Read.Elem
31
32 textOf :: Buildable a => a -> Text
33 textOf = TL.toStrict . Builder.toLazyText . build
34
35 -- * Type 'Groups'
36 type Groups = (Token,[(Group,Token)])
37
38 openGroup :: Group -> Groups -> Groups
39 openGroup g (t,ms) = (t,(g,mempty):ms)
40
41 groupToken :: Token -> Groups -> Groups
42 groupToken mrk (t,[]) = (t<>mrk,[])
43 groupToken mrk (t,(g0,m0):gs) = (t,(g0,m0<>mrk):gs)
44
45 closeGroup :: Group -> Groups -> Groups
46 closeGroup g (t,[]) = (t<>TokenPlain (snd $ groupBorders g mempty),[])
47 closeGroup g (t,(g1,m1):ms) =
48 case (g,g1) of
49 (GroupElem x ax, GroupElem y ay) | x == y ->
50 groupToken (TokenGroup (GroupElem x (ax<>ay)) m1) (t,ms)
51 (x,y) | x == y -> groupToken (TokenGroup g1 m1) (t,ms)
52 _ ->
53 closeGroup g $
54 groupToken
55 (TokenPlain (fst $ groupBorders g1 mempty) <> m1)
56 (t,ms)
57
58 closeGroups :: Groups -> Token
59 closeGroups grps =
60 let (m0,gs) = appendLexeme (LexemeWhite "") grps in
61 foldr (\(g,t) acc ->
62 acc <> TokenPlain (fst $ groupBorders g mempty) <> t) m0 gs
63
64 -- * Type 'Lexeme'
65 data Lexeme
66 = LexemeGroupOpen Group
67 | LexemeGroupClose Group
68 | LexemeGroupPlain Char
69 | LexemeWhite Text
70 | LexemeWord Text
71 | LexemeToken Token
72 | LexemeEscape Char
73 | LexemeLink Text
74 deriving (Show, Eq)
75
76 appendLexeme :: Lexeme -> Groups -> Groups
77 appendLexeme lex gs =
78 case dbg "appendLexeme" lex of
79 _ | (tok,(GroupHash,tag):gs') <- gs
80 , (case lex of
81 LexemeWord{} -> False
82 LexemeEscape{} -> False
83 LexemeGroupClose GroupHash -> False
84 _ -> True) ->
85 appendLexeme lex $
86 groupToken (TokenTag (textOf tag)) (tok,gs')
87 LexemeGroupOpen g -> openGroup g gs
88 LexemeGroupClose g -> closeGroup g gs
89 LexemeGroupPlain c -> groupToken (TokenPlain (Text.singleton c)) gs
90 LexemeWhite wh -> groupToken (TokenPlain wh) gs
91 LexemeWord wo -> groupToken (TokenPlain wo) gs
92 LexemeToken tok -> groupToken tok gs
93 LexemeEscape c -> groupToken (TokenEscape c) gs
94 LexemeLink lnk -> groupToken (TokenLink lnk) gs
95
96 appendLexemes :: Groups -> [Lexeme] -> Groups
97 appendLexemes = foldl' (flip appendLexeme)
98
99 -- * Parsers
100
101 p_Token :: Parser e s Token
102 p_Token = closeGroups <$> p_Groups (mempty,[])
103
104 p_Groups :: Groups -> Parser e s Groups
105 p_Groups gs = pdbg "Groups" $
106 (<|>)
107 (p_Lexemes (mempty == gs) >>= p_Groups . appendLexemes gs)
108 (P.eof $> gs)
109
110 p_Lexemes :: Bool -> Parser e s [Lexeme]
111 p_Lexemes isBOF = pdbg "Lexemes" $
112 P.choice
113 [ P.try $ p_GroupClose
114 , P.try $ p_GroupOpen isBOF
115 , P.try $ p_GroupOpenOrClose l_GroupOpenOrClose
116 , P.try $ pure <$> p_White
117 , pure . LexemeWord <$> p_Word
118 ]
119
120 p_White :: Parser e s Lexeme
121 p_White = pdbg "White" $
122 LexemeWhite <$> p_Spaces
123
124 p_PunctOrSym :: Parser e s Char
125 p_PunctOrSym = P.satisfy $ \c -> Char.isPunctuation c || Char.isSymbol c
126
127 p_GroupOpen :: Bool -> Parser e s [Lexeme]
128 p_GroupOpen isBOF = pdbg "GroupOpen" $ do
129 wh <- if isBOF then return [] else pure <$> p_White
130 ps <- mconcat <$> P.some (P.try $ p_GroupOpenOrClose $ l_GroupOpenAndClose LexemeGroupOpen)
131 return $ wh<>ps
132
133 p_GroupClose :: Parser e s [Lexeme]
134 p_GroupClose = pdbg "GroupClose" $ do
135 ps <- mconcat <$> P.some (P.try $ p_GroupOpenOrClose $ l_GroupOpenAndClose LexemeGroupClose)
136 wh <- pure <$> p_White <|> P.eof $> []
137 return $ ps<>wh
138
139 p_GroupOpenOrClose :: (Char -> Maybe Lexeme) -> Parser e s [Lexeme]
140 p_GroupOpenOrClose grp = pdbg "GroupOpenOrClose" $ do
141 P.choice
142 [ P.try p_Elem
143 , P.try (pure <$> p_Escape)
144 , P.try (pure <$> p_Link)
145 , (<$> p_PunctOrSym) $ \c ->
146 pure $
147 LexemeGroupPlain c `fromMaybe`
148 grp c
149 ]
150
151 l_GroupOpenAndClose :: (Group -> Lexeme) -> Char -> Maybe Lexeme
152 l_GroupOpenAndClose lxm c = dbg "GroupOpenAndClose" $
153 case c of
154 '/' -> Just $ lxm GroupSlash
155 '-' -> Just $ lxm GroupDash
156 '"' -> Just $ lxm GroupDoublequote
157 '\'' -> Just $ lxm GroupSinglequote
158 '`' -> Just $ lxm GroupBackquote
159 '_' -> Just $ lxm GroupUnderscore
160 '*' -> Just $ lxm GroupStar
161 '#' -> Just $ lxm GroupHash
162 _ -> l_GroupOpenOrClose c
163
164 l_GroupOpenOrClose :: Char -> Maybe Lexeme
165 l_GroupOpenOrClose c = dbg "GroupOpenOrClose" $
166 case c of
167 '(' -> Just $ LexemeGroupOpen GroupParen
168 '[' -> Just $ LexemeGroupOpen GroupBracket
169 '{' -> Just $ LexemeGroupOpen GroupBrace
170 '«' -> Just $ LexemeGroupOpen GroupFrenchquote
171 ')' -> Just $ LexemeGroupClose GroupParen
172 ']' -> Just $ LexemeGroupClose GroupBracket
173 '}' -> Just $ LexemeGroupClose GroupBrace
174 '»' -> Just $ LexemeGroupClose GroupFrenchquote
175 _ -> Nothing
176
177 p_Link :: Parser e s Lexeme
178 p_Link =
179 (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
180 <$> P.option "" (P.try p_scheme)
181 <*> P.string "//"
182 <*> p_addr
183 where
184 p_scheme =
185 (<>)
186 <$> P.some (P.satisfy $ \c ->
187 Char.isAlphaNum c
188 || c=='_'
189 || c=='-'
190 || c=='+')
191 <*> P.string ":"
192 p_addr =
193 P.many $
194 P.satisfy $ \c ->
195 Char.isAlphaNum c
196 || c=='%'
197 || c=='/'
198 || c=='('
199 || c==')'
200 || c=='-'
201 || c=='_'
202 || c=='.'
203
204 p_Escape :: Parser e s Lexeme
205 p_Escape =
206 LexemeEscape
207 <$ P.char '\\'
208 <*> P.satisfy Char.isPrint
209
210 p_Elem :: Parser e s [Lexeme]
211 p_Elem = pdbg "Elem" $ P.char '<' >> (p_close <|> p_open)
212 where
213 p_open =
214 (\e as oc ->
215 case oc of
216 True -> [ LexemeGroupOpen $ GroupElem e as
217 , LexemeToken $ Tokens mempty -- same elem for open and close
218 , LexemeGroupClose $ GroupElem e [] ]
219 False -> [LexemeGroupOpen $ GroupElem e as])
220 <$> p_Word
221 <*> p_Attrs
222 <*> P.option False (True <$ P.char '/')
223 <* P.char '>'
224 p_close =
225 (\e -> [LexemeGroupClose $ GroupElem e []])
226 <$ P.char '/'
227 <*> p_Word
228 <* P.char '>'