]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read/Token.hs
Add basic DTC writing.
[doclang.git] / Language / TCT / Read / Token.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
6
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
9 import Data.Bool
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.), flip)
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..), fromMaybe)
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence (ViewL(..), (<|))
19 import Data.Text (Text)
20 import Data.Text.Buildable (Buildable(..))
21 import Data.Tuple (fst,snd)
22 import Text.Show (Show(..))
23 import qualified Data.Char as Char
24 import qualified Data.Sequence as Seq
25 import qualified Data.Text as Text
26 import qualified Data.Text.Lazy as TL
27 import qualified Data.Text.Lazy.Builder as Builder
28 import qualified Text.Megaparsec as P
29
30 import Language.TCT.Token
31 import Language.TCT.Elem
32 import Language.TCT.Read.Elem -- hiding (pdbg)
33
34 -- pdbg m p = P.dbg m p
35
36 textOf :: Buildable a => a -> Text
37 textOf = TL.toStrict . Builder.toLazyText . build
38
39 -- * Type 'Groups'
40 type Groups = (Token,[(Group,Token)])
41
42 openGroup :: Group -> Groups -> Groups
43 openGroup g (t,ms) = (t,(g,mempty):ms)
44
45 insertToken :: Token -> Groups -> Groups
46 insertToken tok (t,[]) = (t<>tok,[])
47 insertToken tok (t,(g0,t0):gs) = (t,(g0,t0<>tok):gs)
48
49 -- | Close a 'Group' when there is a matching 'LexemeGroupClose'.
50 closeGroup :: Group -> Groups -> Groups
51 closeGroup g (t,[]) = dbg "closeGroup" $ (t<>TokenPlain (snd $ groupBorders g mempty),[])
52 closeGroup g (t,(g1,m1):ms) = dbg "closeGroup" $
53 case (g,g1) of
54 (GroupElem x ax, GroupElem y ay) | x == y ->
55 insertToken (TokenGroup (GroupElem x (ax<>ay)) m1) (t,ms)
56 (x,y) | x == y -> insertToken (TokenGroup g1 m1) (t,ms)
57 _ ->
58 closeGroup g $
59 insertToken
60 (closelessGroup mempty (g1,m1))
61 (t,ms)
62
63 -- | Close a 'Group' when there is not a matching 'LexemeGroupClose'.
64 closelessGroup :: Token -> (Group,Token) -> Token
65 closelessGroup acc (g,t) = dbg "closelessGroup" $
66 case g of
67 -- NOTE: try to close 'GroupHash' as 'TokenTag' instead of 'TokenPlain'.
68 GroupHash | TokenPlain p :< toks <- Seq.viewl $ unTokens $ t <> acc ->
69 case Text.findIndex (not . isTagChar) p of
70 Just 0 -> TokenPlain (fst $ groupBorders g mempty) <> t <> acc
71 Just i -> Tokens $ TokenTag tag <| TokenPlain p' <| toks
72 where (tag,p') = Text.splitAt i p
73 Nothing -> Tokens $ TokenTag p <| toks
74 _ -> TokenPlain (fst $ groupBorders g mempty) <> t <> acc
75 where
76 isTagChar c =
77 Char.isAlphaNum c ||
78 c=='·' ||
79 case Char.generalCategory c of
80 Char.DashPunctuation -> True
81 Char.ConnectorPunctuation -> True
82 _ -> False
83
84 -- | Close remaining 'Group's at end of parsing.
85 closeGroups :: Groups -> Token
86 closeGroups (t0,gs) = dbg "closeGroups" $
87 t0 <> foldl' closelessGroup mempty gs
88
89 -- * Type 'Lexeme'
90 data Lexeme
91 = LexemeGroupOpen Group
92 | LexemeGroupClose Group
93 | LexemePunctOrSym Char
94 | LexemeWhite Text
95 | LexemeWord Text
96 | LexemeToken Token
97 | LexemeEscape Char
98 | LexemeLink Text
99 deriving (Show, Eq)
100
101 appendLexeme :: Lexeme -> Groups -> Groups
102 appendLexeme lex gs =
103 dbg "appendLexeme" $
104 case dbg "appendLexeme" lex of
105 LexemeGroupOpen g -> openGroup g gs
106 LexemeGroupClose g -> closeGroup g gs
107 LexemePunctOrSym c -> insertToken (TokenPlain (Text.singleton c)) gs
108 LexemeWhite wh -> insertToken (TokenPlain wh) gs
109 LexemeWord wo -> insertToken (TokenPlain wo) gs
110 LexemeToken tok -> insertToken tok gs
111 LexemeEscape c -> insertToken (TokenEscape c) gs
112 LexemeLink lnk -> insertToken (TokenLink lnk) gs
113
114 appendLexemes :: Groups -> [Lexeme] -> Groups
115 appendLexemes = foldl' (flip appendLexeme)
116
117 -- * Parsers
118
119 p_Token :: Parser e s Token
120 p_Token = closeGroups <$> p_Groups (mempty,[])
121
122 p_Groups :: Groups -> Parser e s Groups
123 p_Groups gs = pdbg "Groups" $
124 (p_Lexemes (mempty == gs) >>= p_Groups . appendLexemes gs) <|>
125 (P.eof $> gs)
126
127 p_Lexemes :: Bool -> Parser e s [Lexeme]
128 p_Lexemes isBOF = pdbg "Lexemes" $
129 P.choice
130 [ P.try $ p_GroupCloseWhite
131 , P.try $ p_GroupWhiteOpen isBOF
132 , P.try $ p_GroupCloseBorder
133 , P.try $ p_GroupBorderOpen
134 , P.try $ p_GroupClose
135 , P.try $ p_GroupOpen
136 , P.try $ pure . LexemePunctOrSym <$> p_PunctOrSym
137 , P.try $ pure <$> p_White
138 , pure . LexemeWord <$> p_Word
139 ]
140
141 p_White :: Parser e s Lexeme
142 p_White = pdbg "White" $ LexemeWhite <$> p_Spaces
143
144 p_PunctOrSym :: Parser e s Char
145 p_PunctOrSym = P.satisfy $ \c ->
146 Char.isPunctuation c ||
147 Char.isSymbol c
148
149 p_GroupCloseWhite :: Parser e s [Lexeme]
150 p_GroupCloseWhite = pdbg "GroupCloseWhite" $
151 (\c b -> mconcat c <> b)
152 <$> P.some (P.try p_GroupClose <|> pure . LexemePunctOrSym <$> p_PunctOrSym)
153 <*> ((pure <$> p_White) <|> P.eof $> [])
154
155 p_GroupWhiteOpen :: Bool -> Parser e s [Lexeme]
156 p_GroupWhiteOpen isBOF = pdbg "GroupWhiteOpen" $
157 (\b o -> b <> mconcat o)
158 <$> (if isBOF then return [] else pure <$> p_White)
159 <*> P.some (P.try p_GroupOpen <|> pure . LexemePunctOrSym <$> p_PunctOrSym)
160
161 p_GroupCloseBorder :: Parser e s [Lexeme]
162 p_GroupCloseBorder = pdbg "GroupCloseBorder" $
163 (\c b -> mconcat $ c <> b)
164 <$> P.some (P.try p_GroupClose)
165 <*> P.some (P.try $ P.choice
166 [ P.try p_ElemOpen
167 , P.try p_ElemClose
168 , do
169 c <- p_PunctOrSym
170 case l_GroupClose c of
171 Just l -> return [l]
172 Nothing ->
173 case l_GroupOpenOrClose LexemeGroupOpen c <|> l_GroupOpen c of
174 Nothing -> return [LexemePunctOrSym c]
175 _ -> fail ""
176 ])
177
178 p_GroupBorderOpen :: Parser e s [Lexeme]
179 p_GroupBorderOpen = pdbg "GroupBorderOpen" $
180 (\b o -> mconcat $ b <> o)
181 <$> P.some (P.try $ P.choice
182 [ P.try p_ElemOpen
183 , P.try p_ElemClose
184 , do
185 c <- p_PunctOrSym
186 case l_GroupOpen c of
187 Just l -> return [l]
188 Nothing ->
189 case l_GroupOpenOrClose LexemeGroupClose c <|> l_GroupClose c of
190 Nothing -> return [LexemePunctOrSym c]
191 _ -> fail ""
192 ])
193 <*> P.some (P.try p_GroupOpen)
194
195 p_GroupOpen :: Parser e s [Lexeme]
196 p_GroupOpen = pdbg "GroupOpen" $ do
197 P.choice
198 [ P.try p_ElemOpen
199 , P.try (pure <$> p_Escape)
200 , P.try (pure <$> p_Link)
201 , do
202 c <- p_PunctOrSym
203 case l_GroupOpenAndClose LexemeGroupOpen c of
204 Just l -> return [l]
205 _ -> fail ""
206 ]
207
208 p_GroupClose :: Parser e s [Lexeme]
209 p_GroupClose = pdbg "GroupClose" $ do
210 P.choice
211 [ P.try p_ElemClose
212 , P.try p_ElemSingle
213 , P.try (pure <$> p_Escape)
214 , P.try (pure <$> p_Link)
215 , do
216 c <- p_PunctOrSym
217 case l_GroupOpenAndClose LexemeGroupClose c of
218 Just l -> return [l]
219 _ -> fail ""
220 ]
221
222 p_GroupPlain :: (Char -> Maybe Lexeme) -> Parser e s [Lexeme]
223 p_GroupPlain grp = pdbg "GroupPlain" $ do
224 (<$> p_PunctOrSym) $ \c ->
225 pure $
226 LexemePunctOrSym c `fromMaybe`
227 grp c
228
229 l_GroupOpenOrClose :: (Group -> Lexeme) -> Char -> Maybe Lexeme
230 l_GroupOpenOrClose lxm c =
231 case c of
232 '/' -> Just $ lxm GroupSlash
233 '-' -> Just $ lxm GroupDash
234 '"' -> Just $ lxm GroupDoublequote
235 '\'' -> Just $ lxm GroupSinglequote
236 '`' -> Just $ lxm GroupBackquote
237 '_' -> Just $ lxm GroupUnderscore
238 '*' -> Just $ lxm GroupStar
239 '#' -> Just $ lxm GroupHash
240 _ -> Nothing
241
242 l_GroupOpenAndClose :: (Group -> Lexeme) -> Char -> Maybe Lexeme
243 l_GroupOpenAndClose lxm c =
244 l_GroupOpenOrClose lxm c <|>
245 l_GroupOpen c <|>
246 l_GroupClose c
247
248 l_GroupOpen :: Char -> Maybe Lexeme
249 l_GroupOpen c =
250 case c of
251 '(' -> Just $ LexemeGroupOpen GroupParen
252 '[' -> Just $ LexemeGroupOpen GroupBracket
253 '{' -> Just $ LexemeGroupOpen GroupBrace
254 '«' -> Just $ LexemeGroupOpen GroupFrenchquote
255 _ -> Nothing
256
257 l_GroupClose :: Char -> Maybe Lexeme
258 l_GroupClose c =
259 case c of
260 ')' -> Just $ LexemeGroupClose GroupParen
261 ']' -> Just $ LexemeGroupClose GroupBracket
262 '}' -> Just $ LexemeGroupClose GroupBrace
263 '»' -> Just $ LexemeGroupClose GroupFrenchquote
264 _ -> Nothing
265
266 p_Link :: Parser e s Lexeme
267 p_Link =
268 (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
269 <$> P.option "" (P.try p_scheme)
270 <*> P.string "//"
271 <*> p_addr
272 where
273 p_scheme =
274 (<>)
275 <$> P.some (P.satisfy $ \c ->
276 Char.isAlphaNum c
277 || c=='_'
278 || c=='-'
279 || c=='+')
280 <*> P.string ":"
281 p_addr =
282 P.many $
283 P.satisfy $ \c ->
284 Char.isAlphaNum c
285 || c=='%'
286 || c=='/'
287 || c=='('
288 || c==')'
289 || c=='-'
290 || c=='_'
291 || c=='.'
292
293 p_Escape :: Parser e s Lexeme
294 p_Escape =
295 LexemeEscape
296 <$ P.char '\\'
297 <*> P.satisfy Char.isPrint
298
299 p_ElemSingle :: Parser e s [Lexeme]
300 p_ElemSingle = pdbg "ElemOpen" $
301 (\e as ->
302 [ LexemeGroupOpen $ GroupElem e as
303 , LexemeToken $ Tokens mempty
304 -- NOTE: encode that it's the same Elem for open and close
305 , LexemeGroupClose $ GroupElem e [] ])
306 <$ P.char '<'
307 <*> p_Word
308 <*> p_Attrs
309 <* P.string "/>"
310
311 p_ElemOpen :: Parser e s [Lexeme]
312 p_ElemOpen = pdbg "ElemOpen" $
313 (\e as oc ->
314 case oc of
315 True -> [ LexemeGroupOpen $ GroupElem e as
316 , LexemeToken $ Tokens mempty
317 , LexemeGroupClose $ GroupElem e [] ]
318 False -> [LexemeGroupOpen $ GroupElem e as])
319 <$ P.char '<'
320 <*> p_Word
321 <*> p_Attrs
322 <*> P.option False (True <$ P.char '/')
323 <* P.char '>'
324
325 p_ElemClose :: Parser e s [Lexeme]
326 p_ElemClose = pdbg "ElemClose" $
327 (\e -> [LexemeGroupClose $ GroupElem e []])
328 <$ P.string "</"
329 <*> p_Word
330 <* P.char '>'