]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read/Token.hs
Fix TokenTag parsing.
[doclang.git] / Language / TCT / Read / Token.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
6
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
9 import Data.Bool
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.), flip)
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..), fromMaybe)
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence (ViewL(..), (<|))
19 import Data.Text (Text)
20 import Data.Text.Buildable (Buildable(..))
21 import Data.Tuple (fst,snd)
22 import Text.Show (Show(..))
23 import qualified Data.Char as Char
24 import qualified Data.Sequence as Seq
25 import qualified Data.Text as Text
26 import qualified Data.Text.Lazy as TL
27 import qualified Data.Text.Lazy.Builder as Builder
28 import qualified Text.Megaparsec as P
29
30 import Language.TCT.Token
31 import Language.TCT.Elem
32 import Language.TCT.Read.Elem
33
34 textOf :: Buildable a => a -> Text
35 textOf = TL.toStrict . Builder.toLazyText . build
36
37 -- * Type 'Groups'
38 type Groups = (Token,[(Group,Token)])
39
40 openGroup :: Group -> Groups -> Groups
41 openGroup g (t,ms) = (t,(g,mempty):ms)
42
43 insertToken :: Token -> Groups -> Groups
44 insertToken tok (t,[]) = (t<>tok,[])
45 insertToken tok (t,(g0,t0):gs) = (t,(g0,t0<>tok):gs)
46
47 closeGroup :: Group -> Groups -> Groups
48 closeGroup g (t,[]) = dbg "closeGroup" $ (t<>TokenPlain (snd $ groupBorders g mempty),[])
49 closeGroup g (t,(g1,m1):ms) = dbg "closeGroup" $
50 case (g,g1) of
51 (GroupElem x ax, GroupElem y ay) | x == y ->
52 insertToken (TokenGroup (GroupElem x (ax<>ay)) m1) (t,ms)
53 (x,y) | x == y -> insertToken (TokenGroup g1 m1) (t,ms)
54 _ ->
55 closeGroup g $
56 insertToken
57 (TokenPlain (fst $ groupBorders g1 mempty) <> m1)
58 (t,ms)
59
60 closeGroups :: Groups -> Token
61 closeGroups (t0,gs) = dbg "closeGroups" $
62 t0 <>
63 foldl' (\acc (g,t) ->
64 case g of
65 -- NOTE: try to close 'GroupHash' as 'TokenTag' instead of 'TokenPlain'.
66 GroupHash | TokenPlain p:<toks <- Seq.viewl $ unTokens $ t <> acc ->
67 case Text.findIndex (not . isTagChar) p of
68 Just 0 -> TokenPlain (fst $ groupBorders g mempty) <> t <> acc
69 Just i ->
70 let (tag,p') = Text.splitAt i p in
71 Tokens $ TokenTag tag<|TokenPlain p'<|toks
72 Nothing ->
73 Tokens $ TokenTag p<|toks
74 _ -> TokenPlain (fst $ groupBorders g mempty) <> t <> acc
75 ) mempty gs
76 where
77 isTagChar c =
78 Char.isAlphaNum c ||
79 case Char.generalCategory c of
80 Char.DashPunctuation -> True
81 Char.ConnectorPunctuation -> True
82 _ -> False
83
84 -- * Type 'Lexeme'
85 data Lexeme
86 = LexemeGroupOpen Group
87 | LexemeGroupClose Group
88 | LexemeGroupPlain Char
89 | LexemeWhite Text
90 | LexemeWord Text
91 | LexemeToken Token
92 | LexemeEscape Char
93 | LexemeLink Text
94 deriving (Show, Eq)
95
96 appendLexeme :: Lexeme -> Groups -> Groups
97 appendLexeme lex gs =
98 dbg "appendLexeme:" $
99 case dbg "appendLexeme:" lex of
100 LexemeGroupOpen g -> openGroup g gs
101 LexemeGroupClose g -> closeGroup g gs
102 LexemeGroupPlain c -> insertToken (TokenPlain (Text.singleton c)) gs
103 LexemeWhite wh -> insertToken (TokenPlain wh) gs
104 LexemeWord wo -> insertToken (TokenPlain wo) gs
105 LexemeToken tok -> insertToken tok gs
106 LexemeEscape c -> insertToken (TokenEscape c) gs
107 LexemeLink lnk -> insertToken (TokenLink lnk) gs
108
109 appendLexemes :: Groups -> [Lexeme] -> Groups
110 appendLexemes = foldl' (flip appendLexeme)
111
112 -- * Parsers
113
114 p_Token :: Parser e s Token
115 p_Token = closeGroups <$> p_Groups (mempty,[])
116
117 p_Groups :: Groups -> Parser e s Groups
118 p_Groups gs = pdbg "Groups" $
119 (<|>)
120 (p_Lexemes (mempty == gs) >>= p_Groups . appendLexemes gs)
121 (P.eof $> gs)
122
123 p_Lexemes :: Bool -> Parser e s [Lexeme]
124 p_Lexemes isBOF = pdbg "Lexemes" $
125 P.choice
126 [ P.try $ p_GroupClose
127 , P.try $ p_GroupOpen isBOF
128 , P.try $ p_GroupOpenOrClose l_GroupOpenOrClose
129 , P.try $ pure <$> p_White
130 , pure . LexemeWord <$> p_Word
131 ]
132
133 p_White :: Parser e s Lexeme
134 p_White = pdbg "White" $
135 LexemeWhite <$> p_Spaces
136
137 p_PunctOrSym :: Parser e s Char
138 p_PunctOrSym = P.satisfy $ \c -> Char.isPunctuation c || Char.isSymbol c
139
140 p_GroupOpen :: Bool -> Parser e s [Lexeme]
141 p_GroupOpen isBOF = pdbg "GroupOpen" $ do
142 wh <- if isBOF then return [] else pure <$> p_White
143 ps <- mconcat <$> P.some (P.try $ p_GroupOpenOrClose $ l_GroupOpenAndClose LexemeGroupOpen)
144 return $ wh<>ps
145
146 p_GroupClose :: Parser e s [Lexeme]
147 p_GroupClose = pdbg "GroupClose" $ do
148 ps <- mconcat <$> P.some (P.try $ p_GroupOpenOrClose $ l_GroupOpenAndClose LexemeGroupClose)
149 wh <- pure <$> p_White <|> P.eof $> []
150 return $ ps<>wh
151
152 p_GroupOpenOrClose :: (Char -> Maybe Lexeme) -> Parser e s [Lexeme]
153 p_GroupOpenOrClose grp = pdbg "GroupOpenOrClose" $ do
154 P.choice
155 [ P.try p_Elem
156 , P.try (pure <$> p_Escape)
157 , P.try (pure <$> p_Link)
158 , (<$> p_PunctOrSym) $ \c ->
159 pure $
160 LexemeGroupPlain c `fromMaybe`
161 grp c
162 ]
163
164 l_GroupOpenAndClose :: (Group -> Lexeme) -> Char -> Maybe Lexeme
165 l_GroupOpenAndClose lxm c = dbg "GroupOpenAndClose" $
166 case c of
167 '/' -> Just $ lxm GroupSlash
168 '-' -> Just $ lxm GroupDash
169 '"' -> Just $ lxm GroupDoublequote
170 '\'' -> Just $ lxm GroupSinglequote
171 '`' -> Just $ lxm GroupBackquote
172 '_' -> Just $ lxm GroupUnderscore
173 '*' -> Just $ lxm GroupStar
174 '#' -> Just $ lxm GroupHash
175 _ -> l_GroupOpenOrClose c
176
177 l_GroupOpenOrClose :: Char -> Maybe Lexeme
178 l_GroupOpenOrClose c = dbg "GroupOpenOrClose" $
179 case c of
180 '(' -> Just $ LexemeGroupOpen GroupParen
181 '[' -> Just $ LexemeGroupOpen GroupBracket
182 '{' -> Just $ LexemeGroupOpen GroupBrace
183 '«' -> Just $ LexemeGroupOpen GroupFrenchquote
184 ')' -> Just $ LexemeGroupClose GroupParen
185 ']' -> Just $ LexemeGroupClose GroupBracket
186 '}' -> Just $ LexemeGroupClose GroupBrace
187 '»' -> Just $ LexemeGroupClose GroupFrenchquote
188 '#' -> Just $ LexemeGroupOpen GroupHash
189 _ -> Nothing
190
191 p_Link :: Parser e s Lexeme
192 p_Link =
193 (\scheme ss addr -> LexemeLink $ Text.pack $ scheme <> ss <> addr)
194 <$> P.option "" (P.try p_scheme)
195 <*> P.string "//"
196 <*> p_addr
197 where
198 p_scheme =
199 (<>)
200 <$> P.some (P.satisfy $ \c ->
201 Char.isAlphaNum c
202 || c=='_'
203 || c=='-'
204 || c=='+')
205 <*> P.string ":"
206 p_addr =
207 P.many $
208 P.satisfy $ \c ->
209 Char.isAlphaNum c
210 || c=='%'
211 || c=='/'
212 || c=='('
213 || c==')'
214 || c=='-'
215 || c=='_'
216 || c=='.'
217
218 p_Escape :: Parser e s Lexeme
219 p_Escape =
220 LexemeEscape
221 <$ P.char '\\'
222 <*> P.satisfy Char.isPrint
223
224 p_Elem :: Parser e s [Lexeme]
225 p_Elem = pdbg "Elem" $ P.char '<' >> (p_close <|> p_open)
226 where
227 p_open =
228 (\e as oc ->
229 case oc of
230 True -> [ LexemeGroupOpen $ GroupElem e as
231 , LexemeToken $ Tokens mempty -- same elem for open and close
232 , LexemeGroupClose $ GroupElem e [] ]
233 False -> [LexemeGroupOpen $ GroupElem e as])
234 <$> p_Word
235 <*> p_Attrs
236 <*> P.option False (True <$ P.char '/')
237 <* P.char '>'
238 p_close =
239 (\e -> [LexemeGroupClose $ GroupElem e []])
240 <$ P.char '/'
241 <*> p_Word
242 <* P.char '>'