]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read/Token.hs
Remove channel State in DTC writing.
[doclang.git] / Language / TCT / Read / Token.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
6
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
9 import Data.Bool
10 import Data.Char (Char)
11 import Data.Either (Either(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), ($>), (<$))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (ViewL(..), (<|))
21 import Data.Text (Text)
22 import Data.Text.Buildable (Buildable(..))
23 import Data.Tuple (fst,snd)
24 import Text.Show (Show(..))
25 import qualified Data.Char as Char
26 import qualified Data.Sequence as Seq
27 import qualified Data.Set as Set
28 import qualified Data.Text as Text
29 import qualified Data.Text.Lazy as TL
30 import qualified Data.Text.Lazy.Builder as Builder
31 import qualified Text.Megaparsec as P
32 import qualified Text.Megaparsec.Prim as P
33
34 import Language.TCT.Token
35 import Language.TCT.Elem -- hiding (dbg)
36 import Language.TCT.Read.Elem -- hiding (pdbg)
37
38 {-
39 import Debug.Trace (trace)
40 dbg m x = trace (m <> ": " <> show x) x
41 pdbg m p = P.dbg m p
42 -}
43
44 textOf :: Buildable a => a -> Text
45 textOf = TL.toStrict . Builder.toLazyText . build
46
47 -- * Type 'Pairs'
48 type Pairs = (Tokens,[(Pair,Tokens)])
49
50 appendToken :: Pairs -> Token -> Pairs
51 appendToken ps = appendTokens ps . Tokens . Seq.singleton
52
53 appendTokens :: Pairs -> Tokens -> Pairs
54 appendTokens (t,[]) toks = (t<>toks,[])
55 appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
56
57 openPair :: Pairs -> Pair -> Pairs
58 openPair (t,ms) p = (t,(p,mempty):ms)
59
60 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
61 closePair :: Pairs -> Pair -> Pairs
62 closePair (t,[]) p = dbg "closePair" $
63 (t<>tokens1 (TokenPlain $ snd $ pairBorders p tokensPlainEmpty),[])
64 closePair (t,(p1,t1):ts) p = dbg "closePair" $
65 case (p,p1) of
66 (PairElem x ax, PairElem y ay) | x == y ->
67 appendToken (t,ts) $ TokenPair (PairElem x (ax<>ay)) t1
68 (x,y) | x == y -> appendToken (t,ts) $ TokenPair p1 t1
69 _ ->
70 (`closePair` p) $
71 appendTokens
72 (t,ts)
73 (closeUnpaired mempty (p1,t1))
74
75 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
76 closeUnpaired :: Tokens -> (Pair,Tokens) -> Tokens
77 closeUnpaired acc (p,toks) = dbg "closeUnpaired" $
78 case p of
79 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
80 PairHash | TokenPlain t :< ts <- Seq.viewl $ unTokens $ toks <> acc ->
81 case Text.findIndex (not . isTagChar) t of
82 Just 0 -> toksHash <> toks <> acc
83 Just i -> Tokens $ TokenTag tag <| TokenPlain t' <| ts
84 where (tag,t') = Text.splitAt i t
85 Nothing | Text.null t -> toksHash <> toks <> acc
86 Nothing -> Tokens $ TokenTag t <| ts
87 where toksHash = tokens1 $ TokenPlain $ fst $ pairBorders p mempty
88 _ -> tokens1 (TokenPlain $ fst $ pairBorders p tokensPlainEmpty) <> toks <> acc
89 where
90 isTagChar c =
91 Char.isAlphaNum c ||
92 c=='·' ||
93 case Char.generalCategory c of
94 Char.DashPunctuation -> True
95 Char.ConnectorPunctuation -> True
96 _ -> False
97
98 -- | Close remaining 'Pair's at end of parsing.
99 closePairs :: Pairs -> Tokens
100 closePairs (t0,ps) = dbg "closePairs" $
101 t0 <> foldl' closeUnpaired mempty ps
102
103 appendLexeme :: Lexeme -> Pairs -> Pairs
104 appendLexeme lex acc =
105 dbg "appendLexeme" $
106 case dbg "appendLexeme" lex of
107 LexemePairOpen ps -> foldl' open acc ps
108 where
109 open a p@PairElem{} = openPair a p `appendToken` TokenPlain ""
110 open a p = openPair a p
111 LexemePairClose ps -> foldl' closePair acc ps
112 LexemePairAny ps -> appendTokens acc $ tokens $ TokenPlain . fst . (`pairBorders` mempty) <$> ps
113 LexemePairBoth ps -> appendTokens acc $ tokens $ (`TokenPair`mempty) <$> ps
114 LexemeEscape c -> appendToken acc $ TokenEscape c
115 LexemeLink t -> appendToken acc $ TokenLink t
116 LexemeWhite "" -> acc
117 LexemeWhite cs -> appendToken acc $ TokenPlain cs
118 LexemeAlphaNum cs -> appendToken acc $ TokenPlain $ Text.pack cs
119 LexemeChar c -> appendToken acc $ TokenPlain $ Text.singleton c
120 LexemeToken ts -> appendTokens acc ts
121
122 -- * Type 'Lexeme'
123 data Lexeme
124 = LexemePairOpen ![Pair]
125 | LexemePairClose ![Pair]
126 | LexemePairAny ![Pair]
127 | LexemePairBoth ![Pair]
128 | LexemeEscape !Char
129 | LexemeLink !Text
130 | LexemeWhite !White
131 | LexemeAlphaNum ![Char]
132 | LexemeChar !Char
133 | LexemeToken !Tokens
134 deriving (Eq, Show)
135
136 p_satisfyMaybe :: (P.MonadParsec e s m, P.Token s ~ Char) => (Char -> Maybe a) -> m a
137 p_satisfyMaybe f = P.token testChar Nothing
138 where
139 testChar c =
140 case f c of
141 Just a -> Right a
142 Nothing -> Left (Set.singleton $ P.Tokens $ c:|[], Set.empty, Set.empty)
143
144 p_Tokens :: Parser e s Tokens
145 p_Tokens = pdbg "Tokens" $
146 closePairs .
147 foldr appendLexeme mempty .
148 dbg "Lexemes" .
149 mangleLexemes .
150 (LexemeWhite "" :) <$>
151 go [LexemeWhite ""]
152 where
153 go :: [Lexeme] -> Parser e s [Lexeme]
154 go acc =
155 (P.eof $> acc) <|>
156 (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
157
158 mangleLexemes = \case
159 w@LexemeWhite{} :p@LexemePairAny{}:acc -> w:any2close p:acc
160 p@LexemePairAny{}:w@LexemeWhite{} :acc -> any2open p:w:acc
161
162 l@LexemeAlphaNum{}:c@LexemeChar{} :p@LexemePairAny{}:acc -> l:c:any2close p:acc
163 l@LexemeAlphaNum{}:p@LexemePairAny{}:c@LexemeChar{}:acc -> l:any2open p:c:acc
164
165 acc -> acc
166
167 any2close,any2open :: Lexeme -> Lexeme
168 any2close (LexemePairAny ps) = LexemePairClose ps
169 any2close c = c
170 any2open (LexemePairAny ps) = LexemePairOpen ps
171 any2open c = c
172
173 pairAny :: Char -> Maybe Pair
174 pairAny = \case
175 '-' -> Just PairDash
176 '/' -> Just PairSlash
177 '"' -> Just PairDoublequote
178 '\'' -> Just PairSinglequote
179 '`' -> Just PairBackquote
180 '_' -> Just PairUnderscore
181 '*' -> Just PairStar
182 '#' -> Just PairHash
183 _ -> Nothing
184
185 pairOpen :: Char -> Maybe Pair
186 pairOpen = \case
187 '(' -> Just PairParen
188 '[' -> Just PairBracket
189 '{' -> Just PairBrace
190 '«' -> Just PairFrenchquote
191 _ -> Nothing
192
193 pairClose :: Char -> Maybe Pair
194 pairClose = \case
195 ')' -> Just PairParen
196 ']' -> Just PairBracket
197 '}' -> Just PairBrace
198 '»' -> Just PairFrenchquote
199 _ -> Nothing
200
201 p_Lexeme :: Parser e s Lexeme
202 p_Lexeme = pdbg "Lexeme" $
203 P.choice
204 [ P.try $ LexemeWhite <$> p_Spaces
205 , P.try $ LexemePairAny <$> P.some (p_satisfyMaybe pairAny)
206 , P.try $ LexemePairBoth <$> P.some (P.try p_ElemSingle)
207 , P.try $ LexemePairOpen <$> P.some (p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
208 , P.try $ LexemePairClose <$> P.some (p_satisfyMaybe pairClose <|> P.try p_ElemClose)
209 , P.try $ LexemeEscape <$> p_Escape
210 , P.try $ LexemeLink <$> p_Link
211 , P.try $ LexemeAlphaNum <$> P.some p_AlphaNum
212 , LexemeChar <$> P.anyChar
213 ]
214
215 p_AlphaNum :: Parser e s Char
216 p_AlphaNum = P.satisfy Char.isAlphaNum
217
218 p_Escape :: Parser e s Char
219 p_Escape = P.char '\\' *> P.satisfy Char.isPrint
220
221 p_Link :: Parser e s Text
222 p_Link =
223 (\scheme ss addr -> Text.pack $ scheme <> ss <> addr)
224 <$> P.option "" (P.try p_scheme)
225 <*> P.string "//"
226 <*> p_addr
227 where
228 p_scheme =
229 (<>)
230 <$> P.some (P.satisfy $ \c ->
231 Char.isAlphaNum c
232 || c=='_'
233 || c=='-'
234 || c=='+')
235 <*> P.string ":"
236 p_addr =
237 P.many $
238 P.satisfy $ \c ->
239 Char.isAlphaNum c
240 || c=='%'
241 || c=='/'
242 || c=='('
243 || c==')'
244 || c=='-'
245 || c=='_'
246 || c=='.'
247 || c=='#'
248 || c=='?'
249 || c=='='
250
251 p_ElemSingle :: Parser e s Pair
252 p_ElemSingle = pdbg "ElemSingle" $
253 PairElem
254 <$ P.char '<'
255 <*> p_Word
256 <*> p_Attrs
257 <* P.string "/>"
258
259 p_ElemOpen :: Parser e s Pair
260 p_ElemOpen = pdbg "ElemOpen" $
261 PairElem
262 <$ P.char '<'
263 <*> p_Word
264 <*> p_Attrs
265 <* P.char '>'
266
267 p_ElemClose :: Parser e s Pair
268 p_ElemClose = pdbg "ElemClose" $
269 (`PairElem` [])
270 <$ P.string "</"
271 <*> p_Word
272 <* P.char '>'
273
274 {-
275 p_ElemOpenOrSingle :: Parser e s Pair
276 p_ElemOpenOrSingle =
277 p_ElemOpen >>= \p ->
278 P.char '>' $> LexemePairOpen p <|>
279 P.string "/>" $> LexemePairAny p
280 -}