]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read/Token.hs
Add HTML5 rendition of DTC.Index.
[doclang.git] / Language / TCT / Read / Token.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 {-# LANGUAGE ViewPatterns #-}
6 module Language.TCT.Read.Token where
7
8 import Control.Applicative (Applicative(..), Alternative(..))
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), ($>), (<$))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Semigroup (Semigroup(..))
19 import Data.Sequence (ViewL(..), (<|))
20 import Data.Text (Text)
21 import Data.Text.Buildable (Buildable(..))
22 import Data.Tuple (fst,snd)
23 import Prelude (Num(..))
24 import Text.Show (Show(..))
25 import qualified Data.Char as Char
26 import qualified Data.Sequence as Seq
27 import qualified Data.Text as Text
28 import qualified Data.Text.Lazy as TL
29 import qualified Data.Text.Lazy.Builder as Builder
30 import qualified Text.Megaparsec as P
31 import qualified Text.Megaparsec.Char as P
32
33 import Language.TCT.Token
34 import Language.TCT.Cell
35 import Language.TCT.Elem
36 import Language.TCT.Read.Elem
37 import Language.TCT.Read.Cell
38
39 textOf :: Buildable a => a -> Text
40 textOf = TL.toStrict . Builder.toLazyText . build
41
42 -- * Type 'Pairs'
43 type Pairs = (Tokens,[(Cell Pair,Tokens)])
44
45 appendToken :: Pairs -> Cell Token -> Pairs
46 appendToken ps = appendTokens ps . Seq.singleton
47
48 appendTokens :: Pairs -> Tokens -> Pairs
49 appendTokens (t,[]) toks = (t<>toks,[])
50 appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
51
52 openPair :: Pairs -> Cell Pair -> Pairs
53 openPair (t,ms) p = (t,(p,mempty):ms)
54
55 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
56 closePair :: Pairs -> Cell Pair -> Pairs
57 closePair ps@(_,[]) (Cell bp ep p) = dbg "closePair" $
58 appendToken ps $
59 Cell bp ep $
60 TokenPlain $ snd $ pairBorders p tokensPlainEmpty
61 closePair (t,(p1,t1):ts) p = dbg "closePair" $
62 case (p1,p) of
63 (Cell bx _ex (PairElem x ax), Cell _by ey (PairElem y ay)) | x == y ->
64 appendToken (t,ts) $
65 Cell bx ey $
66 TokenPair (PairElem x (ax<>ay)) t1
67 (Cell bx _ex x, Cell _by ey y) | x == y ->
68 appendToken (t,ts) $
69 Cell bx ey $
70 TokenPair x t1
71 _ ->
72 (`closePair` p) $
73 appendTokens
74 (t,ts)
75 (closeUnpaired mempty (p1,t1))
76
77 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
78 closeUnpaired :: Tokens -> (Cell Pair,Tokens) -> Tokens
79 closeUnpaired acc (Cell bp ep p,toks) = dbg "closeUnpaired" $
80 case p of
81 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
82 PairHash | (Cell bt et (TokenPlain t)) :< ts <- Seq.viewl $ toks <> acc ->
83 case Text.findIndex (not . isTagChar) t of
84 -- Just 0 -> toksHash mempty <> toks <> acc
85 Just i ->
86 Cell bp bt{columnPos = columnPos bt + i} (TokenTag tag)
87 <| Cell bt{columnPos = columnPos bt + i + 1} et (TokenPlain t')
88 <| ts
89 where (tag,t') = Text.splitAt i t
90 Nothing | Text.null t -> toksHash mempty <> toks <> acc
91 Nothing -> Cell bp et (TokenTag t) <| ts
92 _ -> toksHash tokensPlainEmpty <> toks <> acc
93 where
94 toksHash = tokens1 . Cell bp ep . TokenPlain . fst . pairBorders p
95 isTagChar c =
96 Char.isAlphaNum c ||
97 c=='·' ||
98 case Char.generalCategory c of
99 Char.DashPunctuation -> True
100 Char.ConnectorPunctuation -> True
101 _ -> False
102
103 -- | Close remaining 'Pair's at end of parsing.
104 closePairs :: Pairs -> Tokens
105 closePairs (t0,ps) = dbg "closePairs" $
106 t0 <> foldl' closeUnpaired mempty ps
107
108 appendLexeme :: Lexeme -> Pairs -> Pairs
109 appendLexeme lex acc =
110 dbg "appendLexeme" $
111 case dbg "appendLexeme" lex of
112 LexemePairOpen ps -> foldl' open acc ps
113 where
114 open a p@(Cell _bp ep (PairElem{})) = openPair a p `appendToken` (Cell ep ep $ TokenPlain "")
115 open a p = openPair a p
116 LexemePairClose ps -> foldl' closePair acc ps
117 LexemePairAny ps -> appendTokens acc $ tokens $ ((\p -> TokenPlain $ fst $ pairBorders p mempty) <$>) <$> ps
118 LexemePairBoth ps -> appendTokens acc $ tokens $ ((`TokenPair`mempty) <$>) <$> ps
119 LexemeEscape c -> appendToken acc $ TokenEscape <$> c
120 LexemeLink t -> appendToken acc $ TokenLink <$> t
121 LexemeWhite (unCell -> "") -> acc
122 LexemeWhite cs -> appendToken acc $ TokenPlain <$> cs
123 LexemeAlphaNum cs -> appendToken acc $ TokenPlain . Text.pack <$> cs
124 LexemeAny cs -> appendToken acc $ TokenPlain . Text.pack <$> cs
125 LexemeToken ts -> appendTokens acc ts
126
127 -- * Type 'Lexeme'
128 data Lexeme
129 = LexemePairOpen ![Cell Pair]
130 | LexemePairClose ![Cell Pair]
131 | LexemePairAny ![Cell Pair]
132 | LexemePairBoth ![Cell Pair]
133 | LexemeEscape !(Cell Char)
134 | LexemeLink !(Cell Text)
135 | LexemeWhite !(Cell White)
136 | LexemeAlphaNum !(Cell [Char])
137 | LexemeAny !(Cell [Char])
138 | LexemeToken !Tokens
139 deriving (Eq, Show)
140
141 p_Tokens :: Parser e s Tokens
142 p_Tokens = pdbg "Tokens" $
143 closePairs .
144 foldr appendLexeme mempty .
145 dbg "Lexemes" .
146 mangleLexemes .
147 (LexemeWhite (cell0 "") :) <$>
148 go [LexemeWhite (cell0 "")]
149 where
150 go :: [Lexeme] -> Parser e s [Lexeme]
151 go acc =
152 (P.eof $> acc) <|>
153 (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
154
155 mangleLexemes = \case
156 LexemeAny (Cell _bx ex x):LexemeAny (Cell by _ey y):acc -> LexemeAny (Cell by ex (x<>y)):acc
157
158 -- "   
159 w@LexemeWhite{}:LexemePairAny p:acc -> w:LexemePairClose p:acc
160 --    "
161 LexemePairAny p:w@LexemeWhite{}:acc -> LexemePairOpen p:w:acc
162
163 --    ,,,"
164 LexemePairAny p:a@LexemeAny{}:w@LexemeWhite{}:acc -> LexemePairOpen p:a:w:acc
165 -- ",,,   
166 w@LexemeWhite{}:a@LexemeAny{}:LexemePairAny p:acc -> w:a:LexemePairClose p:acc
167
168 -- ",,,AAA
169 an@LexemeAlphaNum{}:a@LexemeAny{}:LexemePairAny p:acc -> an:a:LexemePairClose p:acc
170 -- ,,,"AAA
171 an@LexemeAlphaNum{}:LexemePairAny p:a@LexemeAny{}:acc -> an:LexemePairOpen p:a:acc
172
173 -- ")
174 c@LexemePairClose{}:LexemePairAny p:acc -> c:LexemePairClose p:acc
175 -- ("
176 LexemePairAny p:o@LexemePairOpen{}:acc -> LexemePairOpen p:o:acc
177
178 acc -> acc
179
180 pairAny :: Char -> Maybe Pair
181 pairAny = \case
182 '-' -> Just PairDash
183 '/' -> Just PairSlash
184 '"' -> Just PairDoublequote
185 '\'' -> Just PairSinglequote
186 '`' -> Just PairBackquote
187 '_' -> Just PairUnderscore
188 '*' -> Just PairStar
189 '#' -> Just PairHash
190 _ -> Nothing
191
192 pairOpen :: Char -> Maybe Pair
193 pairOpen = \case
194 '(' -> Just PairParen
195 '[' -> Just PairBracket
196 '{' -> Just PairBrace
197 '«' -> Just PairFrenchquote
198 _ -> Nothing
199
200 pairClose :: Char -> Maybe Pair
201 pairClose = \case
202 ')' -> Just PairParen
203 ']' -> Just PairBracket
204 '}' -> Just PairBrace
205 '»' -> Just PairFrenchquote
206 _ -> Nothing
207
208 p_Cell :: Parser e s a -> Parser e s (Cell a)
209 p_Cell pa = do
210 bp <- p_Position
211 a <- pa
212 ep <- p_Position
213 return $ Cell bp ep a
214
215 p_Lexeme :: Parser e s Lexeme
216 p_Lexeme = pdbg "Lexeme" $
217 P.choice
218 [ P.try $ LexemeWhite <$> p_Cell p_Spaces
219 , P.try $ LexemePairAny <$> P.some (p_Cell $ p_satisfyMaybe pairAny)
220 , P.try $ LexemePairBoth <$> P.some (P.try $ p_Cell p_ElemSingle)
221 , P.try $ LexemePairOpen <$> P.some (p_Cell $ p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
222 , P.try $ LexemePairClose <$> P.some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose)
223 , P.try $ LexemeEscape <$> p_Cell p_Escape
224 , P.try $ LexemeLink <$> p_Cell p_Link
225 , P.try $ LexemeAlphaNum <$> p_Cell (P.some p_AlphaNum)
226 , LexemeAny <$> p_Cell (pure <$> P.anyChar)
227 ]
228
229 p_AlphaNum :: Parser e s Char
230 p_AlphaNum = P.satisfy Char.isAlphaNum
231
232 p_Escape :: Parser e s Char
233 p_Escape = P.char '\\' *> P.satisfy Char.isPrint
234
235 p_Link :: Parser e s Text
236 p_Link =
237 (\scheme addr -> Text.pack $ scheme <> "//" <> addr)
238 <$> P.option "" (P.try p_scheme)
239 <* P.string "//"
240 <*> p_addr
241 where
242 p_scheme =
243 (<> ":")
244 <$> P.some (P.satisfy $ \c ->
245 Char.isAlphaNum c
246 || c=='_'
247 || c=='-'
248 || c=='+')
249 <* P.char ':'
250 p_addr =
251 P.many $
252 P.satisfy $ \c ->
253 Char.isAlphaNum c
254 || c=='%'
255 || c=='/'
256 || c=='('
257 || c==')'
258 || c=='-'
259 || c=='_'
260 || c=='.'
261 || c=='#'
262 || c=='?'
263 || c=='='
264
265 p_ElemSingle :: Parser e s Pair
266 p_ElemSingle = pdbg "ElemSingle" $
267 PairElem
268 <$ P.char '<'
269 <*> p_Word
270 <*> p_Attrs
271 <* P.string "/>"
272
273 p_ElemOpen :: Parser e s Pair
274 p_ElemOpen = pdbg "ElemOpen" $
275 PairElem
276 <$ P.char '<'
277 <*> p_Word
278 <*> p_Attrs
279 <* P.char '>'
280
281 p_ElemClose :: Parser e s Pair
282 p_ElemClose = pdbg "ElemClose" $
283 (`PairElem` [])
284 <$ P.string "</"
285 <*> p_Word
286 <* P.char '>'
287
288 {-
289 p_ElemOpenOrSingle :: Parser e s Pair
290 p_ElemOpenOrSingle =
291 p_ElemOpen >>= \p ->
292 P.char '>' $> LexemePairOpen p <|>
293 P.string "/>" $> LexemePairAny p
294 -}