1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 {-# LANGUAGE ViewPatterns #-}
6 module Language.TCT.Read.Token where
8 import Control.Applicative (Applicative(..), Alternative(..))
9 import Control.Monad (Monad(..))
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), ($>), (<$))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Semigroup (Semigroup(..))
19 import Data.Sequence (ViewL(..), (<|))
20 import Data.Text (Text)
21 import Data.Text.Buildable (Buildable(..))
22 import Data.Tuple (fst,snd)
23 import Prelude (Num(..))
24 import Text.Show (Show(..))
25 import qualified Data.Char as Char
26 import qualified Data.Sequence as Seq
27 import qualified Data.Text as Text
28 import qualified Data.Text.Lazy as TL
29 import qualified Data.Text.Lazy.Builder as Builder
30 import qualified Text.Megaparsec as P
31 import qualified Text.Megaparsec.Char as P
33 import Language.TCT.Token
34 import Language.TCT.Cell
35 import Language.TCT.Elem
36 import Language.TCT.Read.Elem
37 import Language.TCT.Read.Cell
39 textOf :: Buildable a => a -> Text
40 textOf = TL.toStrict . Builder.toLazyText . build
43 type Pairs = (Tokens,[(Cell Pair,Tokens)])
45 appendToken :: Pairs -> Cell Token -> Pairs
46 appendToken ps = appendTokens ps . Seq.singleton
48 appendTokens :: Pairs -> Tokens -> Pairs
49 appendTokens (t,[]) toks = (t<>toks,[])
50 appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
52 openPair :: Pairs -> Cell Pair -> Pairs
53 openPair (t,ms) p = (t,(p,mempty):ms)
55 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
56 closePair :: Pairs -> Cell Pair -> Pairs
57 closePair ps@(_,[]) (Cell bp ep p) = dbg "closePair" $
60 TokenPlain $ snd $ pairBorders p tokensPlainEmpty
61 closePair (t,(p1,t1):ts) p = dbg "closePair" $
63 (Cell bx _ex (PairElem x ax), Cell _by ey (PairElem y ay)) | x == y ->
66 TokenPair (PairElem x (ax<>ay)) t1
67 (Cell bx _ex x, Cell _by ey y) | x == y ->
75 (closeUnpaired mempty (p1,t1))
77 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
78 closeUnpaired :: Tokens -> (Cell Pair,Tokens) -> Tokens
79 closeUnpaired acc (Cell bp ep p,toks) = dbg "closeUnpaired" $
81 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
82 PairHash | (Cell bt et (TokenPlain t)) :< ts <- Seq.viewl $ toks <> acc ->
83 case Text.findIndex (not . isTagChar) t of
84 -- Just 0 -> toksHash mempty <> toks <> acc
86 Cell bp bt{columnPos = columnPos bt + i} (TokenTag tag)
87 <| Cell bt{columnPos = columnPos bt + i + 1} et (TokenPlain t')
89 where (tag,t') = Text.splitAt i t
90 Nothing | Text.null t -> toksHash mempty <> toks <> acc
91 Nothing -> Cell bp et (TokenTag t) <| ts
92 _ -> toksHash tokensPlainEmpty <> toks <> acc
94 toksHash = tokens1 . Cell bp ep . TokenPlain . fst . pairBorders p
98 case Char.generalCategory c of
99 Char.DashPunctuation -> True
100 Char.ConnectorPunctuation -> True
103 -- | Close remaining 'Pair's at end of parsing.
104 closePairs :: Pairs -> Tokens
105 closePairs (t0,ps) = dbg "closePairs" $
106 t0 <> foldl' closeUnpaired mempty ps
108 appendLexeme :: Lexeme -> Pairs -> Pairs
109 appendLexeme lex acc =
111 case dbg "appendLexeme" lex of
112 LexemePairOpen ps -> foldl' open acc ps
114 open a p@(Cell _bp ep (PairElem{})) = openPair a p `appendToken` (Cell ep ep $ TokenPlain "")
115 open a p = openPair a p
116 LexemePairClose ps -> foldl' closePair acc ps
117 LexemePairAny ps -> appendTokens acc $ tokens $ ((\p -> TokenPlain $ fst $ pairBorders p mempty) <$>) <$> ps
118 LexemePairBoth ps -> appendTokens acc $ tokens $ ((`TokenPair`mempty) <$>) <$> ps
119 LexemeEscape c -> appendToken acc $ TokenEscape <$> c
120 LexemeLink t -> appendToken acc $ TokenLink <$> t
121 LexemeWhite (unCell -> "") -> acc
122 LexemeWhite cs -> appendToken acc $ TokenPlain <$> cs
123 LexemeAlphaNum cs -> appendToken acc $ TokenPlain . Text.pack <$> cs
124 LexemeAny cs -> appendToken acc $ TokenPlain . Text.pack <$> cs
125 LexemeToken ts -> appendTokens acc ts
129 = LexemePairOpen ![Cell Pair]
130 | LexemePairClose ![Cell Pair]
131 | LexemePairAny ![Cell Pair]
132 | LexemePairBoth ![Cell Pair]
133 | LexemeEscape !(Cell Char)
134 | LexemeLink !(Cell Text)
135 | LexemeWhite !(Cell White)
136 | LexemeAlphaNum !(Cell [Char])
137 | LexemeAny !(Cell [Char])
138 | LexemeToken !Tokens
141 p_Tokens :: Parser e s Tokens
142 p_Tokens = pdbg "Tokens" $
144 foldr appendLexeme mempty .
147 (LexemeWhite (cell0 "") :) <$>
148 go [LexemeWhite (cell0 "")]
150 go :: [Lexeme] -> Parser e s [Lexeme]
153 (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
155 mangleLexemes = \case
156 LexemeAny (Cell _bx ex x):LexemeAny (Cell by _ey y):acc -> LexemeAny (Cell by ex (x<>y)):acc
159 w@LexemeWhite{}:LexemePairAny p:acc -> w:LexemePairClose p:acc
161 LexemePairAny p:w@LexemeWhite{}:acc -> LexemePairOpen p:w:acc
164 LexemePairAny p:a@LexemeAny{}:w@LexemeWhite{}:acc -> LexemePairOpen p:a:w:acc
166 w@LexemeWhite{}:a@LexemeAny{}:LexemePairAny p:acc -> w:a:LexemePairClose p:acc
169 an@LexemeAlphaNum{}:a@LexemeAny{}:LexemePairAny p:acc -> an:a:LexemePairClose p:acc
171 an@LexemeAlphaNum{}:LexemePairAny p:a@LexemeAny{}:acc -> an:LexemePairOpen p:a:acc
174 c@LexemePairClose{}:LexemePairAny p:acc -> c:LexemePairClose p:acc
176 LexemePairAny p:o@LexemePairOpen{}:acc -> LexemePairOpen p:o:acc
180 pairAny :: Char -> Maybe Pair
183 '/' -> Just PairSlash
184 '"' -> Just PairDoublequote
185 '\'' -> Just PairSinglequote
186 '`' -> Just PairBackquote
187 '_' -> Just PairUnderscore
192 pairOpen :: Char -> Maybe Pair
194 '(' -> Just PairParen
195 '[' -> Just PairBracket
196 '{' -> Just PairBrace
197 '«' -> Just PairFrenchquote
200 pairClose :: Char -> Maybe Pair
202 ')' -> Just PairParen
203 ']' -> Just PairBracket
204 '}' -> Just PairBrace
205 '»' -> Just PairFrenchquote
208 p_Cell :: Parser e s a -> Parser e s (Cell a)
213 return $ Cell bp ep a
215 p_Lexeme :: Parser e s Lexeme
216 p_Lexeme = pdbg "Lexeme" $
218 [ P.try $ LexemeWhite <$> p_Cell p_Spaces
219 , P.try $ LexemePairAny <$> P.some (p_Cell $ p_satisfyMaybe pairAny)
220 , P.try $ LexemePairBoth <$> P.some (P.try $ p_Cell p_ElemSingle)
221 , P.try $ LexemePairOpen <$> P.some (p_Cell $ p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
222 , P.try $ LexemePairClose <$> P.some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose)
223 , P.try $ LexemeEscape <$> p_Cell p_Escape
224 , P.try $ LexemeLink <$> p_Cell p_Link
225 , P.try $ LexemeAlphaNum <$> p_Cell (P.some p_AlphaNum)
226 , LexemeAny <$> p_Cell (pure <$> P.anyChar)
229 p_AlphaNum :: Parser e s Char
230 p_AlphaNum = P.satisfy Char.isAlphaNum
232 p_Escape :: Parser e s Char
233 p_Escape = P.char '\\' *> P.satisfy Char.isPrint
235 p_Link :: Parser e s Text
237 (\scheme addr -> Text.pack $ scheme <> "//" <> addr)
238 <$> P.option "" (P.try p_scheme)
244 <$> P.some (P.satisfy $ \c ->
265 p_ElemSingle :: Parser e s Pair
266 p_ElemSingle = pdbg "ElemSingle" $
273 p_ElemOpen :: Parser e s Pair
274 p_ElemOpen = pdbg "ElemOpen" $
281 p_ElemClose :: Parser e s Pair
282 p_ElemClose = pdbg "ElemClose" $
289 p_ElemOpenOrSingle :: Parser e s Pair
292 P.char '>' $> LexemePairOpen p <|>
293 P.string "/>" $> LexemePairAny p