]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read/Token.hs
Add Cell Text parsing.
[doclang.git] / Language / TCT / Read / Token.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
6
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
9 import Data.Bool
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (foldr, foldl')
13 import Data.Function (($), (.), flip)
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Monoid (Monoid(..))
16 import Data.Semigroup (Semigroup(..))
17 import Data.Text (Text)
18 import Data.Text.Buildable (Buildable(..))
19 import Data.Tuple (fst,snd)
20 import Text.Show (Show(..))
21 import qualified Data.Char as Char
22 import qualified Data.Text as Text
23 import qualified Data.Text.Lazy as TL
24 import qualified Data.Text.Lazy.Builder as Builder
25 import qualified Text.Megaparsec as P
26
27 import Language.TCT.Token
28 import Language.TCT.Elem
29 import Language.TCT.Read.Elem
30
31 textOf :: Buildable a => a -> Text
32 textOf = TL.toStrict . Builder.toLazyText . build
33
34 -- * Type 'Groups'
35 type Groups = (Token,[(Group,Token)])
36
37 openGroup :: Group -> Groups -> Groups
38 openGroup g (t,ms) = (t,(g,mempty):ms)
39
40 groupToken :: Token -> Groups -> Groups
41 groupToken mrk (t,[]) = (t<>mrk,[])
42 groupToken mrk (t,(g0,m0):gs) = (t,(g0,m0<>mrk):gs)
43
44 closeGroup :: Group -> Groups -> Groups
45 closeGroup g (t,[]) = (t<>TokenPlain (snd $ groupBorders g mempty),[])
46 closeGroup g (t,(g1,m1):ms) =
47 case (g,g1) of
48 (GroupElem x ax, GroupElem y ay) | x == y ->
49 groupToken (TokenGroup (GroupElem x (ax<>ay)) m1) (t,ms)
50 (x,y) | x == y -> groupToken (TokenGroup g1 m1) (t,ms)
51 _ ->
52 closeGroup g $
53 groupToken
54 (TokenPlain (fst $ groupBorders g1 mempty) <> m1)
55 (t,ms)
56
57 closeGroups :: Groups -> Token
58 closeGroups grps =
59 let (m0,gs) = appendLexeme (LexemeWhite "") grps in
60 foldr (\(g,t) acc ->
61 acc <> TokenPlain (fst $ groupBorders g mempty) <> t) m0 gs
62
63 -- * Type 'Lexeme'
64 data Lexeme
65 = LexemeGroupOpen Group
66 | LexemeGroupClose Group
67 | LexemeGroupPlain Char
68 | LexemeWhite Text
69 | LexemeWord Text
70 | LexemeToken Token
71 | LexemeEscape Char
72 | LexemeLink Text
73 deriving (Show, Eq)
74
75 appendLexeme :: Lexeme -> Groups -> Groups
76 appendLexeme lex gs =
77 case dbg "appendLexeme" lex of
78 _ | (tok,(GroupHash,tag):gs') <- gs
79 , (case lex of
80 LexemeWord{} -> False
81 LexemeEscape{} -> False
82 LexemeGroupClose GroupHash -> False
83 _ -> True) ->
84 appendLexeme lex $
85 groupToken (TokenTag (textOf tag)) (tok,gs')
86 LexemeGroupOpen g -> openGroup g gs
87 LexemeGroupClose g -> closeGroup g gs
88 LexemeGroupPlain c -> groupToken (TokenPlain (Text.singleton c)) gs
89 LexemeWhite wh -> groupToken (TokenPlain wh) gs
90 LexemeWord wo -> groupToken (TokenPlain wo) gs
91 LexemeToken tok -> groupToken tok gs
92 LexemeEscape c -> groupToken (TokenEscape c) gs
93 LexemeLink lnk -> groupToken (TokenLink lnk) gs
94
95 appendLexemes :: Groups -> [Lexeme] -> Groups
96 appendLexemes = foldl' (flip appendLexeme)
97
98 -- * Parsers
99
100 p_Token :: Parser e s Token
101 p_Token = closeGroups <$> p_Groups (mempty,[])
102
103 p_Groups :: Groups -> Parser e s Groups
104 p_Groups gs = pdbg "Groups" $
105 (<|>)
106 (p_Lexemes >>= p_Groups . appendLexemes gs)
107 (P.eof $> gs)
108
109 p_Lexemes :: Parser e s [Lexeme]
110 p_Lexemes = pdbg "Lexemes" $
111 P.choice
112 [ P.try p_Escape
113 , P.try p_Elem
114 , P.try $ pure <$> p_Link
115 , P.try $ (<>) <$> ({-pure <$> P.try p_Link <|>-} P.some p_GroupClose)
116 <*> (pure <$> p_White <|> P.eof $> [])
117 , P.try $ (:) <$> p_White
118 <*> ({-pure <$> P.try p_Link <|>-} P.some p_GroupOpen)
119 , P.try $ pure <$> (p_PunctOrSym >>= \c ->
120 P.option (LexemeGroupPlain c) $
121 p_GroupOpenOrClose c)
122 , P.try $ pure <$> p_White
123 , pure . LexemeWord <$> p_Word
124 ]
125
126 p_White :: Parser e s Lexeme
127 p_White = pdbg "White" $
128 LexemeWhite <$> p_Spaces
129
130 p_PunctOrSym :: Parser e s Char
131 p_PunctOrSym = P.satisfy $ \c -> Char.isPunctuation c || Char.isSymbol c
132
133 p_GroupOpenOrClose :: Char -> Parser e s Lexeme
134 p_GroupOpenOrClose c = pdbg "GroupOpenClose" $ do
135 case c of
136 '(' -> return $ LexemeGroupOpen GroupParen
137 '[' -> return $ LexemeGroupOpen GroupBracket
138 '{' -> return $ LexemeGroupOpen GroupBrace
139 '«' -> return $ LexemeGroupOpen GroupFrenchquote
140 ')' -> return $ LexemeGroupClose GroupParen
141 ']' -> return $ LexemeGroupClose GroupBracket
142 '}' -> return $ LexemeGroupClose GroupBrace
143 '»' -> return $ LexemeGroupClose GroupFrenchquote
144 _ -> fail "GroupOpenOrClose"
145
146 p_GroupOpen :: Parser e s Lexeme
147 p_GroupOpen = pdbg "GroupOpen" $ do
148 c <- p_PunctOrSym
149 case c of
150 '/' -> open GroupSlash
151 '-' -> open GroupDash
152 '"' -> open GroupDoublequote
153 '\'' -> open GroupSinglequote
154 '`' -> open GroupBackquote
155 '_' -> open GroupUnderscore
156 '*' -> open GroupStar
157 '#' -> open GroupHash
158 _ -> p_GroupOpenOrClose c
159 where
160 open = return . LexemeGroupOpen
161
162 p_GroupClose :: Parser e s Lexeme
163 p_GroupClose = pdbg "GroupClose" $ do
164 c <- p_PunctOrSym
165 case c of
166 '/' -> close GroupSlash
167 '-' -> close GroupDash
168 '"' -> close GroupDoublequote
169 '\'' -> close GroupSinglequote
170 '`' -> close GroupBackquote
171 '_' -> close GroupUnderscore
172 '*' -> close GroupStar
173 '#' -> close GroupHash
174 _ -> p_GroupOpenOrClose c
175 where
176 close = return . LexemeGroupClose
177
178 p_Link :: Parser e s Lexeme
179 p_Link =
180 (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
181 <$> P.option "" (P.try p_scheme)
182 <*> P.string "//"
183 <*> p_addr
184 where
185 p_scheme =
186 (<>)
187 <$> P.some (
188 P.satisfy $ \c ->
189 Char.isPrint c &&
190 not (Char.isSpace c) &&
191 c/='/' &&
192 c/=':')
193 <*> P.string ":"
194 p_addr =
195 P.many $
196 P.satisfy $ \c ->
197 Char.isPrint c &&
198 not (Char.isSpace c) &&
199 c/='<' &&
200 c/='>'
201
202 p_Escape :: Parser e s [Lexeme]
203 p_Escape =
204 pure . LexemeEscape
205 <$ P.char '\\'
206 <*> P.satisfy Char.isPrint
207
208 p_Elem :: Parser e s [Lexeme]
209 p_Elem = pdbg "Elem" $
210 P.char '<' >> (p_close <|> p_open)
211 where
212 p_open =
213 (\e as oc ->
214 case oc of
215 True -> [ LexemeGroupOpen $ GroupElem e as
216 , LexemeToken $ Tokens mempty -- same elem for open and close
217 , LexemeGroupClose $ GroupElem e [] ]
218 False -> [LexemeGroupOpen $ GroupElem e as])
219 <$> p_Word
220 <*> p_Attrs
221 <*> P.option False (True <$ P.char '/')
222 <* P.char '>'
223 p_close =
224 (\e -> [LexemeGroupClose $ GroupElem e []])
225 <$ P.char '/'
226 <*> p_Word
227 <* P.char '>'