]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read/Token.hs
Add Xmlify.
[doclang.git] / Language / TCT / Read / Token.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 {-# LANGUAGE ViewPatterns #-}
6 module Language.TCT.Read.Token where
7
8 import Control.Applicative (Applicative(..), Alternative(..))
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), ($>), (<$))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Semigroup (Semigroup(..))
19 import Data.Sequence (ViewL(..), (<|))
20 import Data.Text (Text)
21 import Data.Text.Buildable (Buildable(..))
22 import Data.Tuple (fst,snd)
23 import Prelude (Num(..))
24 import Text.Show (Show(..))
25 import qualified Data.Char as Char
26 import qualified Data.Sequence as Seq
27 import qualified Data.Text as Text
28 import qualified Data.Text.Lazy as TL
29 import qualified Data.Text.Lazy.Builder as Builder
30 import qualified Text.Megaparsec as P
31 import qualified Text.Megaparsec.Char as P
32
33 import Language.TCT.Token
34 import Language.TCT.Cell
35 import Language.TCT.Elem
36 import Language.TCT.Read.Elem
37 import Language.TCT.Read.Cell
38
39 textOf :: Buildable a => a -> Text
40 textOf = TL.toStrict . Builder.toLazyText . build
41
42 -- * Type 'Pairs'
43 type Pairs = (Tokens,[(Cell Pair,Tokens)])
44
45 appendToken :: Pairs -> Cell Token -> Pairs
46 appendToken ps = appendTokens ps . Seq.singleton
47
48 appendTokens :: Pairs -> Tokens -> Pairs
49 appendTokens (t,[]) toks = (t<>toks,[])
50 appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
51
52 openPair :: Pairs -> Cell Pair -> Pairs
53 openPair (t,ms) p = (t,(p,mempty):ms)
54
55 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
56 closePair :: Pairs -> Cell Pair -> Pairs
57 closePair ps@(_,[]) (Cell bp ep p) = dbg "closePair" $
58 appendToken ps $
59 Cell bp ep $
60 TokenPlain $ snd $ pairBorders p tokensPlainEmpty
61 closePair (t,(p1,t1):ts) p = dbg "closePair" $
62 case (p1,p) of
63 (Cell bx _ex (PairElem x ax), Cell _by ey (PairElem y ay)) | x == y ->
64 appendToken (t,ts) $
65 Cell bx ey $
66 TokenPair (PairElem x (ax<>ay)) t1
67 (Cell bx _ex x, Cell _by ey y) | x == y ->
68 appendToken (t,ts) $
69 Cell bx ey $
70 TokenPair x t1
71 _ ->
72 (`closePair` p) $
73 appendTokens
74 (t,ts)
75 (closeUnpaired mempty (p1,t1))
76
77 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
78 closeUnpaired :: Tokens -> (Cell Pair,Tokens) -> Tokens
79 closeUnpaired acc (Cell bp ep p,toks) = dbg "closeUnpaired" $
80 case p of
81 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
82 PairHash | (Cell bt et (TokenPlain t)) :< ts <- Seq.viewl $ toks <> acc ->
83 case Text.findIndex (not . isTagChar) t of
84 -- Just 0 -> toksHash mempty <> toks <> acc
85 Just i ->
86 Cell bp bt{columnPos = columnPos bt + i} (TokenTag tag)
87 <| Cell bt{columnPos = columnPos bt + i + 1} et (TokenPlain t')
88 <| ts
89 where (tag,t') = Text.splitAt i t
90 Nothing | Text.null t -> toksHash mempty <> toks <> acc
91 Nothing -> Cell bp et (TokenTag t) <| ts
92 _ -> toksHash tokensPlainEmpty <> toks <> acc
93 where
94 toksHash = tokens1 . Cell bp ep . TokenPlain . fst . pairBorders p
95 isTagChar c =
96 Char.isAlphaNum c ||
97 c=='·' ||
98 case Char.generalCategory c of
99 Char.DashPunctuation -> True
100 Char.ConnectorPunctuation -> True
101 _ -> False
102
103 -- | Close remaining 'Pair's at end of parsing.
104 closePairs :: Pairs -> Tokens
105 closePairs (t0,ps) = dbg "closePairs" $
106 t0 <> foldl' closeUnpaired mempty ps
107
108 appendLexeme :: Lexeme -> Pairs -> Pairs
109 appendLexeme lex acc =
110 dbg "appendLexeme" $
111 case lex of
112 LexemePairOpen ps -> foldl' open acc ps
113 where
114 open a p@(Cell _bp ep (PairElem{})) = openPair a p `appendToken` (Cell ep ep $ TokenPlain "")
115 open a p = openPair a p
116 LexemePairClose ps -> foldl' closePair acc ps
117 LexemePairAny ps -> appendTokens acc $ tokens $ ((\p -> TokenPlain $ fst $ pairBorders p mempty) <$>) <$> ps
118 LexemePairBoth ps -> appendTokens acc $ tokens $ ((`TokenPair`mempty) <$>) <$> ps
119 LexemeEscape c -> appendToken acc $ TokenEscape <$> c
120 LexemeLink t -> appendToken acc $ TokenLink <$> t
121 LexemeWhite (unCell -> "") -> acc
122 LexemeWhite cs -> appendToken acc $ TokenPlain <$> cs
123 LexemeAlphaNum cs -> appendToken acc $ TokenPlain . Text.pack <$> cs
124 LexemeAny cs -> appendToken acc $ TokenPlain . Text.pack <$> cs
125 LexemeToken ts -> appendTokens acc ts
126
127 -- * Type 'Lexeme'
128 data Lexeme
129 = LexemePairOpen ![Cell Pair]
130 | LexemePairClose ![Cell Pair]
131 | LexemePairAny ![Cell Pair]
132 | LexemePairBoth ![Cell Pair]
133 | LexemeEscape !(Cell Char)
134 | LexemeLink !(Cell Text)
135 | LexemeWhite !(Cell White)
136 | LexemeAlphaNum !(Cell [Char])
137 | LexemeAny !(Cell [Char])
138 | LexemeToken !Tokens
139 deriving (Eq, Show)
140
141 p_Tokens :: Parser e s Tokens
142 p_Tokens = pdbg "Tokens" $
143 closePairs .
144 foldr appendLexeme mempty .
145 dbg "Lexemes" .
146 mangleLexemes .
147 (LexemeWhite (cell0 "") :) <$>
148 go [LexemeWhite (cell0 "")]
149 where
150 go :: [Lexeme] -> Parser e s [Lexeme]
151 go acc =
152 (P.eof $> acc) <|>
153 (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
154
155 mangleLexemes = \case
156 LexemeAny (Cell _bx ex x):LexemeAny (Cell by _ey y):acc -> LexemeAny (Cell by ex (x<>y)):acc
157
158 -- "   
159 w@LexemeWhite{}:LexemePairAny p:acc -> w:LexemePairClose p:acc
160 --    "
161 LexemePairAny p:w@LexemeWhite{}:acc -> LexemePairOpen p:w:acc
162
163 --    ,,,"
164 LexemePairAny p:a@LexemeAny{}:w@LexemeWhite{}:acc -> LexemePairOpen p:a:w:acc
165 -- ",,,   
166 w@LexemeWhite{}:a@LexemeAny{}:LexemePairAny p:acc -> w:a:LexemePairClose p:acc
167
168 -- ",,,AAA
169 an@LexemeAlphaNum{}:a@LexemeAny{}:LexemePairAny p:acc -> an:a:LexemePairClose p:acc
170 -- ,,,"AAA
171 an@LexemeAlphaNum{}:LexemePairAny p:a@LexemeAny{}:acc -> an:LexemePairOpen p:a:acc
172
173 -- ")
174 c@LexemePairClose{}:LexemePairAny p:acc -> c:LexemePairClose p:acc
175 -- ("
176 LexemePairAny p:o@LexemePairOpen{}:acc -> LexemePairOpen p:o:acc
177
178 -- "(
179 o@LexemePairOpen{}:LexemePairAny p:acc -> o:LexemePairClose p:acc
180 -- )"
181 LexemePairAny p:c@LexemePairClose{}:acc -> c:LexemePairClose p:acc
182
183 acc -> acc
184
185 pairAny :: Char -> Maybe Pair
186 pairAny = \case
187 '-' -> Just PairDash
188 '/' -> Just PairSlash
189 '"' -> Just PairDoublequote
190 '\'' -> Just PairSinglequote
191 '`' -> Just PairBackquote
192 '_' -> Just PairUnderscore
193 '*' -> Just PairStar
194 '#' -> Just PairHash
195 _ -> Nothing
196
197 pairOpen :: Char -> Maybe Pair
198 pairOpen = \case
199 '(' -> Just PairParen
200 '[' -> Just PairBracket
201 '{' -> Just PairBrace
202 '«' -> Just PairFrenchquote
203 _ -> Nothing
204
205 pairClose :: Char -> Maybe Pair
206 pairClose = \case
207 ')' -> Just PairParen
208 ']' -> Just PairBracket
209 '}' -> Just PairBrace
210 '»' -> Just PairFrenchquote
211 _ -> Nothing
212
213 p_Cell :: Parser e s a -> Parser e s (Cell a)
214 p_Cell pa = do
215 bp <- p_Position
216 a <- pa
217 ep <- p_Position
218 return $ Cell bp ep a
219
220 p_Lexeme :: Parser e s Lexeme
221 p_Lexeme = pdbg "Lexeme" $
222 P.choice
223 [ P.try $ LexemeWhite <$> p_Cell p_Spaces
224 , P.try $ LexemePairAny <$> P.some (p_Cell $ p_satisfyMaybe pairAny)
225 , P.try $ LexemePairBoth <$> P.some (P.try $ p_Cell p_ElemSingle)
226 , P.try $ LexemePairOpen <$> P.some (p_Cell $ p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
227 , P.try $ LexemePairClose <$> P.some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose)
228 , P.try $ LexemeEscape <$> p_Cell p_Escape
229 , P.try $ LexemeLink <$> p_Cell p_Link
230 , P.try $ LexemeAlphaNum <$> p_Cell (P.some p_AlphaNum)
231 , LexemeAny <$> p_Cell (pure <$> P.anyChar)
232 ]
233
234 p_AlphaNum :: Parser e s Char
235 p_AlphaNum = P.satisfy Char.isAlphaNum
236
237 p_Escape :: Parser e s Char
238 p_Escape = P.char '\\' *> P.satisfy Char.isPrint
239
240 p_Link :: Parser e s Text
241 p_Link =
242 P.try (P.char '<' *> p <* P.char '>') <|>
243 p
244 where
245 p =
246 (\scheme addr -> Text.pack $ scheme <> "//" <> addr)
247 <$> P.option "" (P.try p_scheme)
248 <* P.string "//"
249 <*> p_addr
250 p_scheme =
251 (<> ":")
252 <$> P.some (P.satisfy $ \c ->
253 Char.isAlphaNum c
254 || c=='_'
255 || c=='-'
256 || c=='+')
257 <* P.char ':'
258 p_addr =
259 P.many $
260 P.satisfy $ \c ->
261 Char.isAlphaNum c
262 || c=='%'
263 || c=='/'
264 || c=='('
265 || c==')'
266 || c=='-'
267 || c=='_'
268 || c=='.'
269 || c=='#'
270 || c=='?'
271 || c=='='
272
273 p_ElemSingle :: Parser e s Pair
274 p_ElemSingle = pdbg "ElemSingle" $
275 PairElem
276 <$ P.char '<'
277 <*> p_Word
278 <*> p_Attrs
279 <* P.string "/>"
280
281 p_ElemOpen :: Parser e s Pair
282 p_ElemOpen = pdbg "ElemOpen" $
283 PairElem
284 <$ P.char '<'
285 <*> p_Word
286 <*> p_Attrs
287 <* P.char '>'
288
289 p_ElemClose :: Parser e s Pair
290 p_ElemClose = pdbg "ElemClose" $
291 (`PairElem` [])
292 <$ P.string "</"
293 <*> p_Word
294 <* P.char '>'
295
296 {-
297 p_ElemOpenOrSingle :: Parser e s Pair
298 p_ElemOpenOrSingle =
299 p_ElemOpen >>= \p ->
300 P.char '>' $> LexemePairOpen p <|>
301 P.string "/>" $> LexemePairAny p
302 -}