1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 {-# LANGUAGE ViewPatterns #-}
6 module Language.TCT.Read.Token where
8 import Control.Applicative (Applicative(..), Alternative(..))
9 import Control.Monad (Monad(..))
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), ($>), (<$))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Semigroup (Semigroup(..))
19 import Data.Sequence (ViewL(..), (<|))
20 import Data.Text (Text)
21 import Data.Text.Buildable (Buildable(..))
22 import Data.Tuple (fst,snd)
23 import Prelude (Num(..))
24 import Text.Show (Show(..))
25 import qualified Data.Char as Char
26 import qualified Data.Sequence as Seq
27 import qualified Data.Text as Text
28 import qualified Data.Text.Lazy as TL
29 import qualified Data.Text.Lazy.Builder as Builder
30 import qualified Text.Megaparsec as P
31 import qualified Text.Megaparsec.Char as P
33 import Language.TCT.Token
34 import Language.TCT.Cell
35 import Language.TCT.Elem
36 import Language.TCT.Read.Elem
37 import Language.TCT.Read.Cell
39 textOf :: Buildable a => a -> Text
40 textOf = TL.toStrict . Builder.toLazyText . build
43 type Pairs = (Tokens,[(Cell Pair,Tokens)])
45 appendToken :: Pairs -> Cell Token -> Pairs
46 appendToken ps = appendTokens ps . Seq.singleton
48 appendTokens :: Pairs -> Tokens -> Pairs
49 appendTokens (t,[]) toks = (t<>toks,[])
50 appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
52 openPair :: Pairs -> Cell Pair -> Pairs
53 openPair (t,ms) p = (t,(p,mempty):ms)
55 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
56 closePair :: Pairs -> Cell Pair -> Pairs
57 closePair ps@(_,[]) (Cell bp ep p) = dbg "closePair" $
60 TokenPlain $ snd $ pairBorders p tokensPlainEmpty
61 closePair (t,(p1,t1):ts) p = dbg "closePair" $
63 (Cell bx _ex (PairElem x ax), Cell _by ey (PairElem y ay)) | x == y ->
66 TokenPair (PairElem x (ax<>ay)) t1
67 (Cell bx _ex x, Cell _by ey y) | x == y ->
75 (closeUnpaired mempty (p1,t1))
77 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
78 closeUnpaired :: Tokens -> (Cell Pair,Tokens) -> Tokens
79 closeUnpaired acc (Cell bp ep p,toks) = dbg "closeUnpaired" $
81 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
82 PairHash | (Cell bt et (TokenPlain t)) :< ts <- Seq.viewl $ toks <> acc ->
83 case Text.findIndex (not . isTagChar) t of
84 -- Just 0 -> toksHash mempty <> toks <> acc
86 Cell bp bt{columnPos = columnPos bt + i} (TokenTag tag)
87 <| Cell bt{columnPos = columnPos bt + i + 1} et (TokenPlain t')
89 where (tag,t') = Text.splitAt i t
90 Nothing | Text.null t -> toksHash mempty <> toks <> acc
91 Nothing -> Cell bp et (TokenTag t) <| ts
92 _ -> toksHash tokensPlainEmpty <> toks <> acc
94 toksHash = tokens1 . Cell bp ep . TokenPlain . fst . pairBorders p
98 case Char.generalCategory c of
99 Char.DashPunctuation -> True
100 Char.ConnectorPunctuation -> True
103 -- | Close remaining 'Pair's at end of parsing.
104 closePairs :: Pairs -> Tokens
105 closePairs (t0,ps) = dbg "closePairs" $
106 t0 <> foldl' closeUnpaired mempty ps
108 appendLexeme :: Lexeme -> Pairs -> Pairs
109 appendLexeme lex acc =
112 LexemePairOpen ps -> foldl' open acc ps
114 open a p@(Cell _bp ep (PairElem{})) = openPair a p `appendToken` (Cell ep ep $ TokenPlain "")
115 open a p = openPair a p
116 LexemePairClose ps -> foldl' closePair acc ps
117 LexemePairAny ps -> appendTokens acc $ tokens $ ((\p -> TokenPlain $ fst $ pairBorders p mempty) <$>) <$> ps
118 LexemePairBoth ps -> appendTokens acc $ tokens $ ((`TokenPair`mempty) <$>) <$> ps
119 LexemeEscape c -> appendToken acc $ TokenEscape <$> c
120 LexemeLink t -> appendToken acc $ TokenLink <$> t
121 LexemeWhite (unCell -> "") -> acc
122 LexemeWhite cs -> appendToken acc $ TokenPlain <$> cs
123 LexemeAlphaNum cs -> appendToken acc $ TokenPlain . Text.pack <$> cs
124 LexemeAny cs -> appendToken acc $ TokenPlain . Text.pack <$> cs
125 LexemeToken ts -> appendTokens acc ts
129 = LexemePairOpen ![Cell Pair]
130 | LexemePairClose ![Cell Pair]
131 | LexemePairAny ![Cell Pair]
132 | LexemePairBoth ![Cell Pair]
133 | LexemeEscape !(Cell Char)
134 | LexemeLink !(Cell Text)
135 | LexemeWhite !(Cell White)
136 | LexemeAlphaNum !(Cell [Char])
137 | LexemeAny !(Cell [Char])
138 | LexemeToken !Tokens
141 p_Tokens :: Parser e s Tokens
142 p_Tokens = pdbg "Tokens" $
144 foldr appendLexeme mempty .
147 (LexemeWhite (cell0 "") :) <$>
148 go [LexemeWhite (cell0 "")]
150 go :: [Lexeme] -> Parser e s [Lexeme]
153 (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
155 mangleLexemes = \case
156 LexemeAny (Cell _bx ex x):LexemeAny (Cell by _ey y):acc -> LexemeAny (Cell by ex (x<>y)):acc
159 w@LexemeWhite{}:LexemePairAny p:acc -> w:LexemePairClose p:acc
161 LexemePairAny p:w@LexemeWhite{}:acc -> LexemePairOpen p:w:acc
164 LexemePairAny p:a@LexemeAny{}:w@LexemeWhite{}:acc -> LexemePairOpen p:a:w:acc
166 w@LexemeWhite{}:a@LexemeAny{}:LexemePairAny p:acc -> w:a:LexemePairClose p:acc
169 an@LexemeAlphaNum{}:a@LexemeAny{}:LexemePairAny p:acc -> an:a:LexemePairClose p:acc
171 an@LexemeAlphaNum{}:LexemePairAny p:a@LexemeAny{}:acc -> an:LexemePairOpen p:a:acc
174 c@LexemePairClose{}:LexemePairAny p:acc -> c:LexemePairClose p:acc
176 LexemePairAny p:o@LexemePairOpen{}:acc -> LexemePairOpen p:o:acc
179 o@LexemePairOpen{}:LexemePairAny p:acc -> o:LexemePairClose p:acc
181 LexemePairAny p:c@LexemePairClose{}:acc -> c:LexemePairClose p:acc
185 pairAny :: Char -> Maybe Pair
188 '/' -> Just PairSlash
189 '"' -> Just PairDoublequote
190 '\'' -> Just PairSinglequote
191 '`' -> Just PairBackquote
192 '_' -> Just PairUnderscore
197 pairOpen :: Char -> Maybe Pair
199 '(' -> Just PairParen
200 '[' -> Just PairBracket
201 '{' -> Just PairBrace
202 '«' -> Just PairFrenchquote
205 pairClose :: Char -> Maybe Pair
207 ')' -> Just PairParen
208 ']' -> Just PairBracket
209 '}' -> Just PairBrace
210 '»' -> Just PairFrenchquote
213 p_Cell :: Parser e s a -> Parser e s (Cell a)
218 return $ Cell bp ep a
220 p_Lexeme :: Parser e s Lexeme
221 p_Lexeme = pdbg "Lexeme" $
223 [ P.try $ LexemeWhite <$> p_Cell p_Spaces
224 , P.try $ LexemePairAny <$> P.some (p_Cell $ p_satisfyMaybe pairAny)
225 , P.try $ LexemePairBoth <$> P.some (P.try $ p_Cell p_ElemSingle)
226 , P.try $ LexemePairOpen <$> P.some (p_Cell $ p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
227 , P.try $ LexemePairClose <$> P.some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose)
228 , P.try $ LexemeEscape <$> p_Cell p_Escape
229 , P.try $ LexemeLink <$> p_Cell p_Link
230 , P.try $ LexemeAlphaNum <$> p_Cell (P.some p_AlphaNum)
231 , LexemeAny <$> p_Cell (pure <$> P.anyChar)
234 p_AlphaNum :: Parser e s Char
235 p_AlphaNum = P.satisfy Char.isAlphaNum
237 p_Escape :: Parser e s Char
238 p_Escape = P.char '\\' *> P.satisfy Char.isPrint
240 p_Link :: Parser e s Text
242 P.try (P.char '<' *> p <* P.char '>') <|>
246 (\scheme addr -> Text.pack $ scheme <> "//" <> addr)
247 <$> P.option "" (P.try p_scheme)
252 <$> P.some (P.satisfy $ \c ->
273 p_ElemSingle :: Parser e s Pair
274 p_ElemSingle = pdbg "ElemSingle" $
281 p_ElemOpen :: Parser e s Pair
282 p_ElemOpen = pdbg "ElemOpen" $
289 p_ElemClose :: Parser e s Pair
290 p_ElemClose = pdbg "ElemClose" $
297 p_ElemOpenOrSingle :: Parser e s Pair
300 P.char '>' $> LexemePairOpen p <|>
301 P.string "/>" $> LexemePairAny p