1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.))
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..))
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence (ViewL(..), (<|))
19 import Data.Text (Text)
20 import Data.Text.Buildable (Buildable(..))
21 import Data.Tuple (fst,snd)
22 import Prelude (Num(..))
23 import Text.Show (Show(..))
24 import qualified Data.Char as Char
25 import qualified Data.Sequence as Seq
26 import qualified Data.Text as Text
27 import qualified Data.Text.Lazy as TL
28 import qualified Data.Text.Lazy.Builder as Builder
29 import qualified Text.Megaparsec as P
30 import qualified Text.Megaparsec.Char as P
32 import Language.TCT.Token
33 import Language.TCT.Cell
34 import Language.TCT.Elem
35 import Language.TCT.Read.Elem
36 import Language.TCT.Read.Cell
38 textOf :: Buildable a => a -> Text
39 textOf = TL.toStrict . Builder.toLazyText . build
42 type Pairs = (Tokens,[(Cell Pair,Tokens)])
44 appendToken :: Pairs -> Cell Token -> Pairs
45 appendToken ps = appendTokens ps . Seq.singleton
47 appendTokens :: Pairs -> Tokens -> Pairs
48 appendTokens (t,[]) toks = (t<>toks,[])
49 appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
51 openPair :: Pairs -> Cell Pair -> Pairs
52 openPair (t,ms) p = (t,(p,mempty):ms)
54 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
55 closePair :: Pairs -> Cell Pair -> Pairs
56 closePair ps@(_,[]) (Cell bp ep p) = dbg "closePair" $
59 TokenPlain $ snd $ pairBorders p tokensPlainEmpty
60 closePair (t,(p1,t1):ts) p = dbg "closePair" $
62 (Cell bx _ex (PairElem x ax), Cell _by ey (PairElem y ay)) | x == y ->
65 TokenPair (PairElem x (ax<>ay)) t1
66 (Cell bx _ex x, Cell _by ey y) | x == y ->
74 (closeUnpaired mempty (p1,t1))
76 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
77 closeUnpaired :: Tokens -> (Cell Pair,Tokens) -> Tokens
78 closeUnpaired acc (Cell bp ep p,toks) = dbg "closeUnpaired" $
80 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
81 PairHash | (Cell bt et (TokenPlain t)) :< ts <- Seq.viewl $ toks <> acc ->
82 case Text.findIndex (not . isTagChar) t of
83 -- Just 0 -> toksHash mempty <> toks <> acc
85 Cell bp bt{columnPos = columnPos bt + i} (TokenTag tag)
86 <| Cell bt{columnPos = columnPos bt + i + 1} et (TokenPlain t')
88 where (tag,t') = Text.splitAt i t
89 Nothing | Text.null t -> toksHash mempty <> toks <> acc
90 Nothing -> Cell bp et (TokenTag t) <| ts
91 _ -> toksHash tokensPlainEmpty <> toks <> acc
93 toksHash = tokens1 . Cell bp ep . TokenPlain . fst . pairBorders p
97 case Char.generalCategory c of
98 Char.DashPunctuation -> True
99 Char.ConnectorPunctuation -> True
102 -- | Close remaining 'Pair's at end of parsing.
103 closePairs :: Pairs -> Tokens
104 closePairs (t0,ps) = dbg "closePairs" $
105 t0 <> foldl' closeUnpaired mempty ps
107 appendLexeme :: Lexeme -> Pairs -> Pairs
108 appendLexeme lex acc =
110 case dbg "appendLexeme" lex of
111 LexemePairOpen ps -> foldl' open acc ps
113 open a p@(Cell _bp ep (PairElem{})) = openPair a p `appendToken` (Cell ep ep $ TokenPlain "")
114 open a p = openPair a p
115 LexemePairClose ps -> foldl' closePair acc ps
116 LexemePairAny ps -> appendTokens acc $ tokens $ ((\p -> TokenPlain $ fst $ pairBorders p mempty) <$>) <$> ps
117 LexemePairBoth ps -> appendTokens acc $ tokens $ ((`TokenPair`mempty) <$>) <$> ps
118 LexemeEscape c -> appendToken acc $ TokenEscape <$> c
119 LexemeLink t -> appendToken acc $ TokenLink <$> t
120 LexemeWhite (unCell -> "") -> acc
121 LexemeWhite cs -> appendToken acc $ TokenPlain <$> cs
122 LexemeAlphaNum cs -> appendToken acc $ TokenPlain . Text.pack <$> cs
123 LexemeAny cs -> appendToken acc $ TokenPlain . Text.pack <$> cs
124 LexemeToken ts -> appendTokens acc ts
128 = LexemePairOpen ![Cell Pair]
129 | LexemePairClose ![Cell Pair]
130 | LexemePairAny ![Cell Pair]
131 | LexemePairBoth ![Cell Pair]
132 | LexemeEscape !(Cell Char)
133 | LexemeLink !(Cell Text)
134 | LexemeWhite !(Cell White)
135 | LexemeAlphaNum !(Cell [Char])
136 | LexemeAny !(Cell [Char])
137 | LexemeToken !Tokens
140 p_Tokens :: Parser e s Tokens
141 p_Tokens = pdbg "Tokens" $
143 foldr appendLexeme mempty .
146 (LexemeWhite (cell0 "") :) <$>
147 go [LexemeWhite (cell0 "")]
149 go :: [Lexeme] -> Parser e s [Lexeme]
152 (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
154 mangleLexemes = \case
155 LexemeAny (Cell _bx ex x):LexemeAny (Cell by _ey y):acc -> LexemeAny (Cell by ex (x<>y)):acc
158 w@LexemeWhite{}:LexemePairAny p:acc -> w:LexemePairClose p:acc
160 LexemePairAny p:w@LexemeWhite{}:acc -> LexemePairOpen p:w:acc
163 LexemePairAny p:a@LexemeAny{}:w@LexemeWhite{}:acc -> LexemePairOpen p:a:w:acc
165 w@LexemeWhite{}:a@LexemeAny{}:LexemePairAny p:acc -> w:a:LexemePairClose p:acc
168 an@LexemeAlphaNum{}:a@LexemeAny{}:LexemePairAny p:acc -> an:a:LexemePairClose p:acc
170 an@LexemeAlphaNum{}:LexemePairAny p:a@LexemeAny{}:acc -> an:LexemePairOpen p:a:acc
173 c@LexemePairClose{}:LexemePairAny p:acc -> c:LexemePairClose p:acc
175 LexemePairAny p:o@LexemePairOpen{}:acc -> LexemePairOpen p:o:acc
179 pairAny :: Char -> Maybe Pair
182 '/' -> Just PairSlash
183 '"' -> Just PairDoublequote
184 '\'' -> Just PairSinglequote
185 '`' -> Just PairBackquote
186 '_' -> Just PairUnderscore
191 pairOpen :: Char -> Maybe Pair
193 '(' -> Just PairParen
194 '[' -> Just PairBracket
195 '{' -> Just PairBrace
196 '«' -> Just PairFrenchquote
199 pairClose :: Char -> Maybe Pair
201 ')' -> Just PairParen
202 ']' -> Just PairBracket
203 '}' -> Just PairBrace
204 '»' -> Just PairFrenchquote
207 p_Cell :: Parser e s a -> Parser e s (Cell a)
212 return $ Cell bp ep a
214 p_Lexeme :: Parser e s Lexeme
215 p_Lexeme = pdbg "Lexeme" $
217 [ P.try $ LexemeWhite <$> p_Cell p_Spaces
218 , P.try $ LexemePairAny <$> P.some (p_Cell $ p_satisfyMaybe pairAny)
219 , P.try $ LexemePairBoth <$> P.some (P.try $ p_Cell p_ElemSingle)
220 , P.try $ LexemePairOpen <$> P.some (p_Cell $ p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
221 , P.try $ LexemePairClose <$> P.some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose)
222 , P.try $ LexemeEscape <$> p_Cell p_Escape
223 , P.try $ LexemeLink <$> p_Cell p_Link
224 , P.try $ LexemeAlphaNum <$> p_Cell (P.some p_AlphaNum)
225 , LexemeAny <$> p_Cell (pure <$> P.anyChar)
228 p_AlphaNum :: Parser e s Char
229 p_AlphaNum = P.satisfy Char.isAlphaNum
231 p_Escape :: Parser e s Char
232 p_Escape = P.char '\\' *> P.satisfy Char.isPrint
234 p_Link :: Parser e s Text
236 (\scheme addr -> Text.pack $ scheme <> "//" <> addr)
237 <$> P.option "" (P.try p_scheme)
243 <$> P.some (P.satisfy $ \c ->
264 p_ElemSingle :: Parser e s Pair
265 p_ElemSingle = pdbg "ElemSingle" $
272 p_ElemOpen :: Parser e s Pair
273 p_ElemOpen = pdbg "ElemOpen" $
280 p_ElemClose :: Parser e s Pair
281 p_ElemClose = pdbg "ElemClose" $
288 p_ElemOpenOrSingle :: Parser e s Pair
291 P.char '>' $> LexemePairOpen p <|>
292 P.string "/>" $> LexemePairAny p