1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Foldable (Foldable(..))
13 import Data.Function (($), (.))
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.Maybe (Maybe(..))
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence (ViewL(..), (<|))
19 import Data.Text (Text)
20 import Data.Text.Buildable (Buildable(..))
21 import Data.Tuple (fst,snd)
22 import Prelude (Num(..))
23 import Text.Show (Show(..))
24 import qualified Data.Char as Char
25 import qualified Data.Sequence as Seq
26 import qualified Data.Text as Text
27 import qualified Data.Text.Lazy as TL
28 import qualified Data.Text.Lazy.Builder as Builder
29 import qualified Text.Megaparsec as P
30 import qualified Text.Megaparsec.Char as P
32 import Language.TCT.Token
33 import Language.TCT.Cell
34 import Language.TCT.Elem
35 import Language.TCT.Read.Elem
36 import Language.TCT.Read.Cell
39 import Debug.Trace (trace)
40 dbg m x = trace (m <> ": " <> show x) x
44 textOf :: Buildable a => a -> Text
45 textOf = TL.toStrict . Builder.toLazyText . build
48 type Pairs = (Tokens,[(Cell Pair,Tokens)])
50 appendToken :: Pairs -> Cell Token -> Pairs
51 appendToken ps = appendTokens ps . Seq.singleton
53 appendTokens :: Pairs -> Tokens -> Pairs
54 appendTokens (t,[]) toks = (t<>toks,[])
55 appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
57 openPair :: Pairs -> Cell Pair -> Pairs
58 openPair (t,ms) p = (t,(p,mempty):ms)
60 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
61 closePair :: Pairs -> Cell Pair -> Pairs
62 closePair ps@(_,[]) (Cell pos posEnd p) = dbg "closePair" $
65 TokenPlain $ snd $ pairBorders p tokensPlainEmpty
66 closePair (t,(p1,t1):ts) p = dbg "closePair" $
68 (Cell bx _ex (PairElem x ax), Cell _by ey (PairElem y ay)) | x == y ->
71 TokenPair (PairElem x (ax<>ay)) t1
72 (Cell bx _ex x, Cell _by ey y) | x == y ->
80 (closeUnpaired mempty (p1,t1))
82 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
83 closeUnpaired :: Tokens -> (Cell Pair,Tokens) -> Tokens
84 closeUnpaired acc (Cell bp ep p,toks) = dbg "closeUnpaired" $
86 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
87 PairHash | (Cell bt et (TokenPlain t)) :< ts <- Seq.viewl $ toks <> acc ->
88 case Text.findIndex (not . isTagChar) t of
89 -- Just 0 -> toksHash mempty <> toks <> acc
91 Cell bp bt{columnPos = columnPos bt + i} (TokenTag tag)
92 <| Cell bt{columnPos = columnPos bt + i + 1} et (TokenPlain t')
94 where (tag,t') = Text.splitAt i t
95 Nothing | Text.null t -> toksHash mempty <> toks <> acc
96 Nothing -> Cell bp et (TokenTag t) <| ts
97 _ -> toksHash tokensPlainEmpty <> toks <> acc
99 toksHash = tokens1 . Cell bp ep . TokenPlain . fst . pairBorders p
103 case Char.generalCategory c of
104 Char.DashPunctuation -> True
105 Char.ConnectorPunctuation -> True
108 -- | Close remaining 'Pair's at end of parsing.
109 closePairs :: Pairs -> Tokens
110 closePairs (t0,ps) = dbg "closePairs" $
111 t0 <> foldl' closeUnpaired mempty ps
113 appendLexeme :: Lexeme -> Pairs -> Pairs
114 appendLexeme lex acc =
116 case dbg "appendLexeme" lex of
117 LexemePairOpen ps -> foldl' open acc ps
119 open a p@(Cell _bp ep (PairElem{})) = openPair a p `appendToken` (Cell ep ep $ TokenPlain "")
120 open a p = openPair a p
121 LexemePairClose ps -> foldl' closePair acc ps
122 LexemePairAny ps -> appendTokens acc $ tokens $ ((\p -> TokenPlain $ fst $ pairBorders p mempty) <$>) <$> ps
123 LexemePairBoth ps -> appendTokens acc $ tokens $ ((`TokenPair`mempty) <$>) <$> ps
124 LexemeEscape c -> appendToken acc $ TokenEscape <$> c
125 LexemeLink t -> appendToken acc $ TokenLink <$> t
126 LexemeWhite (unCell -> "") -> acc
127 LexemeWhite cs -> appendToken acc $ TokenPlain <$> cs
128 LexemeAlphaNum cs -> appendToken acc $ TokenPlain . Text.pack <$> cs
129 LexemeChar c -> appendToken acc $ TokenPlain . Text.singleton <$> c
130 LexemeToken ts -> appendTokens acc ts
134 = LexemePairOpen ![Cell Pair]
135 | LexemePairClose ![Cell Pair]
136 | LexemePairAny ![Cell Pair]
137 | LexemePairBoth ![Cell Pair]
138 | LexemeEscape !(Cell Char)
139 | LexemeLink !(Cell Text)
140 | LexemeWhite !(Cell White)
141 | LexemeAlphaNum !(Cell [Char])
142 | LexemeChar !(Cell Char)
143 | LexemeToken !Tokens
146 p_Tokens :: Parser e s Tokens
147 p_Tokens = pdbg "Tokens" $
149 foldr appendLexeme mempty .
152 (LexemeWhite (cell0 "") :) <$>
153 go [LexemeWhite (cell0 "")]
155 go :: [Lexeme] -> Parser e s [Lexeme]
158 (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
160 mangleLexemes = \case
161 w@LexemeWhite{} :p@LexemePairAny{}:acc -> w:any2close p:acc
162 p@LexemePairAny{}:w@LexemeWhite{} :acc -> any2open p:w:acc
164 l@LexemeAlphaNum{}:c@LexemeChar{} :p@LexemePairAny{}:acc -> l:c:any2close p:acc
165 l@LexemeAlphaNum{}:p@LexemePairAny{}:c@LexemeChar{}:acc -> l:any2open p:c:acc
169 any2close,any2open :: Lexeme -> Lexeme
170 any2close (LexemePairAny ps) = LexemePairClose ps
172 any2open (LexemePairAny ps) = LexemePairOpen ps
175 pairAny :: Char -> Maybe Pair
178 '/' -> Just PairSlash
179 '"' -> Just PairDoublequote
180 '\'' -> Just PairSinglequote
181 '`' -> Just PairBackquote
182 '_' -> Just PairUnderscore
187 pairOpen :: Char -> Maybe Pair
189 '(' -> Just PairParen
190 '[' -> Just PairBracket
191 '{' -> Just PairBrace
192 '«' -> Just PairFrenchquote
195 pairClose :: Char -> Maybe Pair
197 ')' -> Just PairParen
198 ']' -> Just PairBracket
199 '}' -> Just PairBrace
200 '»' -> Just PairFrenchquote
203 p_Cell :: Parser e s a -> Parser e s (Cell a)
208 return $ Cell bp ep a
210 p_Lexeme :: Parser e s Lexeme
211 p_Lexeme = pdbg "Lexeme" $
213 [ P.try $ LexemeWhite <$> p_Cell p_Spaces
214 , P.try $ LexemePairAny <$> P.some (p_Cell $ p_satisfyMaybe pairAny)
215 , P.try $ LexemePairBoth <$> P.some (P.try $ p_Cell p_ElemSingle)
216 , P.try $ LexemePairOpen <$> P.some (p_Cell $ p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
217 , P.try $ LexemePairClose <$> P.some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose)
218 , P.try $ LexemeEscape <$> p_Cell p_Escape
219 , P.try $ LexemeLink <$> p_Cell p_Link
220 , P.try $ LexemeAlphaNum <$> p_Cell (P.some p_AlphaNum)
221 , LexemeChar <$> p_Cell P.anyChar
224 p_AlphaNum :: Parser e s Char
225 p_AlphaNum = P.satisfy Char.isAlphaNum
227 p_Escape :: Parser e s Char
228 p_Escape = P.char '\\' *> P.satisfy Char.isPrint
230 p_Link :: Parser e s Text
232 (\scheme addr -> Text.pack $ scheme <> "//" <> addr)
233 <$> P.option "" (P.try p_scheme)
239 <$> P.some (P.satisfy $ \c ->
260 p_ElemSingle :: Parser e s Pair
261 p_ElemSingle = pdbg "ElemSingle" $
268 p_ElemOpen :: Parser e s Pair
269 p_ElemOpen = pdbg "ElemOpen" $
276 p_ElemClose :: Parser e s Pair
277 p_ElemClose = pdbg "ElemClose" $
284 p_ElemOpenOrSingle :: Parser e s Pair
287 P.char '>' $> LexemePairOpen p <|>
288 P.string "/>" $> LexemePairAny p