]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read/Token.hs
Use Tree for Token.
[doclang.git] / Language / TCT / Read / Token.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 {-# LANGUAGE ViewPatterns #-}
6 module Language.TCT.Read.Token where
7
8 import Control.Applicative (Applicative(..), Alternative(..))
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), ($>), (<$))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Semigroup (Semigroup(..))
19 import Data.Sequence (ViewL(..), (<|))
20 import Data.Text (Text)
21 -- import Data.Text.Buildable (Buildable(..))
22 import Data.TreeSeq.Strict (Tree(..))
23 import Data.Tuple (fst,snd)
24 import Prelude (Num(..))
25 import Text.Show (Show(..))
26 import qualified Data.Char as Char
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text as Text
29 -- import qualified Data.Text.Lazy as TL
30 -- import qualified Data.Text.Lazy.Builder as Builder
31 import qualified Text.Megaparsec as P
32 import qualified Text.Megaparsec.Char as P
33
34 import Language.TCT.Token
35 import Language.TCT.Cell
36 import Language.TCT.Elem
37 import Language.TCT.Read.Elem
38 import Language.TCT.Read.Cell
39
40 {-
41 textOf :: Buildable a => a -> Text
42 textOf = TL.toStrict . Builder.toLazyText . build
43 -}
44
45 -- * Type 'Pairs'
46 type Pairs = (Tokens,[(Cell Pair,Tokens)])
47
48 appendToken :: Pairs -> Token -> Pairs
49 appendToken ps = appendTokens ps . Seq.singleton
50
51 appendTokens :: Pairs -> Tokens -> Pairs
52 appendTokens (t,[]) toks = (t<>toks,[])
53 appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
54
55 openPair :: Pairs -> Cell Pair -> Pairs
56 openPair (t,ms) p = (t,(p,mempty):ms)
57
58 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
59 closePair :: Pairs -> Cell Pair -> Pairs
60 closePair ps@(_,[]) (Cell bp ep p) = dbg "closePair" $
61 appendToken ps $
62 Tree0 $ Cell bp ep $
63 TokenPlain $ snd $ pairBorders p tokensPlainEmpty
64 closePair (t,(p1,t1):ts) p = dbg "closePair" $
65 case (p1,p) of
66 (Cell bx _ex (PairElem x ax), Cell _by ey (PairElem y ay)) | x == y ->
67 appendToken (t,ts) $
68 TreeN (Cell bx ey $ PairElem x (ax<>ay)) t1
69 (Cell bx _ex x, Cell _by ey y) | x == y ->
70 appendToken (t,ts) $
71 TreeN (Cell bx ey x) t1
72 _ ->
73 (`closePair` p) $
74 appendTokens
75 (t,ts)
76 (closeUnpaired mempty (p1,t1))
77
78 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
79 closeUnpaired :: Tokens -> (Cell Pair,Tokens) -> Tokens
80 closeUnpaired acc (Cell bp ep p,toks) = dbg "closeUnpaired" $
81 case p of
82 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
83 PairHash | (Tree0 (Cell bt et (TokenPlain t))) :< ts <- Seq.viewl $ toks <> acc ->
84 case Text.findIndex (not . isTagChar) t of
85 -- Just 0 -> toksHash mempty <> toks <> acc
86 Just i ->
87 Tree0 (Cell bp bt{columnPos = columnPos bt + i} (TokenTag tag))
88 <| Tree0 (Cell bt{columnPos = columnPos bt + i + 1} et (TokenPlain t'))
89 <| ts
90 where (tag,t') = Text.splitAt i t
91 Nothing | Text.null t -> toksHash mempty <> toks <> acc
92 Nothing -> Tree0 (Cell bp et (TokenTag t)) <| ts
93 _ -> toksHash tokensPlainEmpty <> toks <> acc
94 where
95 toksHash = tokens1 . Tree0 . Cell bp ep . TokenPlain . fst . pairBorders p
96 isTagChar c =
97 Char.isAlphaNum c ||
98 c=='·' ||
99 case Char.generalCategory c of
100 Char.DashPunctuation -> True
101 Char.ConnectorPunctuation -> True
102 _ -> False
103
104 -- | Close remaining 'Pair's at end of parsing.
105 closePairs :: Pairs -> Tokens
106 closePairs (t0,ps) = dbg "closePairs" $
107 t0 <> foldl' closeUnpaired mempty ps
108
109 appendLexeme :: Lexeme -> Pairs -> Pairs
110 appendLexeme lex acc =
111 dbg "appendLexeme" $
112 case lex of
113 LexemePairOpen ps -> foldl' open acc ps
114 where
115 open a p@(Cell _bp ep (PairElem{})) = openPair a p `appendToken` (Tree0 $ Cell ep ep $ TokenPlain "")
116 open a p = openPair a p
117 LexemePairClose ps -> foldl' closePair acc ps
118 LexemePairAny ps -> appendTokens acc $ tokens $ Tree0 . ((\p -> TokenPlain $ fst $ pairBorders p mempty) <$>) <$> ps
119 LexemePairBoth ps -> appendTokens acc $ tokens $ (`TreeN`mempty) <$> ps
120 LexemeEscape c -> appendToken acc $ Tree0 $ TokenEscape <$> c
121 LexemeLink t -> appendToken acc $ Tree0 $ TokenLink <$> t
122 LexemeWhite (unCell -> "") -> acc
123 LexemeWhite cs -> appendToken acc $ Tree0 $ TokenPlain <$> cs
124 LexemeAlphaNum cs -> appendToken acc $ Tree0 $ TokenPlain . Text.pack <$> cs
125 LexemeAny cs -> appendToken acc $ Tree0 $ TokenPlain . Text.pack <$> cs
126 LexemeToken ts -> appendTokens acc ts
127
128 -- * Type 'Lexeme'
129 data Lexeme
130 = LexemePairOpen ![Cell Pair]
131 | LexemePairClose ![Cell Pair]
132 | LexemePairAny ![Cell Pair]
133 | LexemePairBoth ![Cell Pair]
134 | LexemeEscape !(Cell Char)
135 | LexemeLink !(Cell Text)
136 | LexemeWhite !(Cell White)
137 | LexemeAlphaNum !(Cell [Char])
138 | LexemeAny !(Cell [Char])
139 | LexemeToken !Tokens
140 deriving (Eq, Show)
141
142 p_Tokens :: Parser e s Tokens
143 p_Tokens = pdbg "Tokens" $
144 closePairs .
145 foldr appendLexeme mempty .
146 dbg "Lexemes" .
147 mangleLexemes .
148 (LexemeWhite (cell0 "") :) <$>
149 go [LexemeWhite (cell0 "")]
150 where
151 go :: [Lexeme] -> Parser e s [Lexeme]
152 go acc =
153 (P.eof $> acc) <|>
154 (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
155
156 mangleLexemes = \case
157 LexemeAny (Cell _bx ex x):LexemeAny (Cell by _ey y):acc -> LexemeAny (Cell by ex (x<>y)):acc
158
159 -- "   
160 w@LexemeWhite{}:LexemePairAny p:acc -> w:LexemePairClose p:acc
161 --    "
162 LexemePairAny p:w@LexemeWhite{}:acc -> LexemePairOpen p:w:acc
163
164 --    ,,,"
165 LexemePairAny p:a@LexemeAny{}:w@LexemeWhite{}:acc -> LexemePairOpen p:a:w:acc
166 -- ",,,   
167 w@LexemeWhite{}:a@LexemeAny{}:LexemePairAny p:acc -> w:a:LexemePairClose p:acc
168
169 -- ",,,AAA
170 an@LexemeAlphaNum{}:a@LexemeAny{}:LexemePairAny p:acc -> an:a:LexemePairClose p:acc
171 -- ,,,"AAA
172 an@LexemeAlphaNum{}:LexemePairAny p:a@LexemeAny{}:acc -> an:LexemePairOpen p:a:acc
173
174 -- ")
175 c@LexemePairClose{}:LexemePairAny p:acc -> c:LexemePairClose p:acc
176 -- ("
177 LexemePairAny p:o@LexemePairOpen{}:acc -> LexemePairOpen p:o:acc
178
179 -- "(
180 o@LexemePairOpen{}:LexemePairAny p:acc -> o:LexemePairClose p:acc
181 -- )"
182 LexemePairAny p:c@LexemePairClose{}:acc -> c:LexemePairClose p:acc
183
184 acc -> acc
185
186 pairAny :: Char -> Maybe Pair
187 pairAny = \case
188 '-' -> Just PairDash
189 '/' -> Just PairSlash
190 '"' -> Just PairDoublequote
191 '\'' -> Just PairSinglequote
192 '`' -> Just PairBackquote
193 '_' -> Just PairUnderscore
194 '*' -> Just PairStar
195 '#' -> Just PairHash
196 _ -> Nothing
197
198 pairOpen :: Char -> Maybe Pair
199 pairOpen = \case
200 '(' -> Just PairParen
201 '[' -> Just PairBracket
202 '{' -> Just PairBrace
203 '«' -> Just PairFrenchquote
204 _ -> Nothing
205
206 pairClose :: Char -> Maybe Pair
207 pairClose = \case
208 ')' -> Just PairParen
209 ']' -> Just PairBracket
210 '}' -> Just PairBrace
211 '»' -> Just PairFrenchquote
212 _ -> Nothing
213
214 p_Cell :: Parser e s a -> Parser e s (Cell a)
215 p_Cell pa = do
216 bp <- p_Position
217 a <- pa
218 ep <- p_Position
219 return $ Cell bp ep a
220
221 p_Lexeme :: Parser e s Lexeme
222 p_Lexeme = pdbg "Lexeme" $
223 P.choice
224 [ P.try $ LexemeWhite <$> p_Cell p_Spaces
225 , P.try $ LexemePairAny <$> P.some (p_Cell $ p_satisfyMaybe pairAny)
226 , P.try $ LexemePairBoth <$> P.some (P.try $ p_Cell p_ElemSingle)
227 , P.try $ LexemePairOpen <$> P.some (p_Cell $ p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
228 , P.try $ LexemePairClose <$> P.some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose)
229 , P.try $ LexemeEscape <$> p_Cell p_Escape
230 , P.try $ LexemeLink <$> p_Cell p_Link
231 , P.try $ LexemeAlphaNum <$> p_Cell (P.some p_AlphaNum)
232 , LexemeAny <$> p_Cell (pure <$> P.anyChar)
233 ]
234
235 p_AlphaNum :: Parser e s Char
236 p_AlphaNum = P.satisfy Char.isAlphaNum
237
238 p_Escape :: Parser e s Char
239 p_Escape = P.char '\\' *> P.satisfy Char.isPrint
240
241 p_Link :: Parser e s Text
242 p_Link =
243 P.try (P.char '<' *> p <* P.char '>') <|>
244 p
245 where
246 p =
247 (\scheme addr -> Text.pack $ scheme <> "//" <> addr)
248 <$> P.option "" (P.try p_scheme)
249 <* P.string "//"
250 <*> p_addr
251 p_scheme =
252 (<> ":")
253 <$> P.some (P.satisfy $ \c ->
254 Char.isAlphaNum c
255 || c=='_'
256 || c=='-'
257 || c=='+')
258 <* P.char ':'
259 p_addr =
260 P.many $
261 P.satisfy $ \c ->
262 Char.isAlphaNum c
263 || c=='%'
264 || c=='/'
265 || c=='('
266 || c==')'
267 || c=='-'
268 || c=='_'
269 || c=='.'
270 || c=='#'
271 || c=='?'
272 || c=='='
273
274 p_ElemSingle :: Parser e s Pair
275 p_ElemSingle = pdbg "ElemSingle" $
276 PairElem
277 <$ P.char '<'
278 <*> p_Word
279 <*> p_Attrs
280 <* P.string "/>"
281
282 p_ElemOpen :: Parser e s Pair
283 p_ElemOpen = pdbg "ElemOpen" $
284 PairElem
285 <$ P.char '<'
286 <*> p_Word
287 <*> p_Attrs
288 <* P.char '>'
289
290 p_ElemClose :: Parser e s Pair
291 p_ElemClose = pdbg "ElemClose" $
292 (`PairElem` [])
293 <$ P.string "</"
294 <*> p_Word
295 <* P.char '>'
296
297 {-
298 p_ElemOpenOrSingle :: Parser e s Pair
299 p_ElemOpenOrSingle =
300 p_ElemOpen >>= \p ->
301 P.char '>' $> LexemePairOpen p <|>
302 P.string "/>" $> LexemePairAny p
303 -}