1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 {-# LANGUAGE ViewPatterns #-}
6 module Language.TCT.Read.Token where
8 import Control.Applicative (Applicative(..), Alternative(..))
9 import Control.Monad (Monad(..))
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), ($>), (<$))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Semigroup (Semigroup(..))
19 import Data.Sequence (ViewL(..), (<|))
20 import Data.Text (Text)
21 -- import Data.Text.Buildable (Buildable(..))
22 import Data.TreeSeq.Strict (Tree(..))
23 import Data.Tuple (fst,snd)
24 import Prelude (Num(..))
25 import Text.Show (Show(..))
26 import qualified Data.Char as Char
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text as Text
29 -- import qualified Data.Text.Lazy as TL
30 -- import qualified Data.Text.Lazy.Builder as Builder
31 import qualified Text.Megaparsec as P
32 import qualified Text.Megaparsec.Char as P
34 import Language.TCT.Token
35 import Language.TCT.Cell
36 import Language.TCT.Elem
37 import Language.TCT.Read.Elem
38 import Language.TCT.Read.Cell
41 textOf :: Buildable a => a -> Text
42 textOf = TL.toStrict . Builder.toLazyText . build
46 type Pairs = (Tokens,[(Cell Pair,Tokens)])
48 appendToken :: Pairs -> Token -> Pairs
49 appendToken ps = appendTokens ps . Seq.singleton
51 appendTokens :: Pairs -> Tokens -> Pairs
52 appendTokens (t,[]) toks = (t<>toks,[])
53 appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
55 openPair :: Pairs -> Cell Pair -> Pairs
56 openPair (t,ms) p = (t,(p,mempty):ms)
58 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
59 closePair :: Pairs -> Cell Pair -> Pairs
60 closePair ps@(_,[]) (Cell bp ep p) = dbg "closePair" $
63 TokenPlain $ snd $ pairBorders p tokensPlainEmpty
64 closePair (t,(p1,t1):ts) p = dbg "closePair" $
66 (Cell bx _ex (PairElem x ax), Cell _by ey (PairElem y ay)) | x == y ->
68 TreeN (Cell bx ey $ PairElem x (ax<>ay)) t1
69 (Cell bx _ex x, Cell _by ey y) | x == y ->
71 TreeN (Cell bx ey x) t1
76 (closeUnpaired mempty (p1,t1))
78 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
79 closeUnpaired :: Tokens -> (Cell Pair,Tokens) -> Tokens
80 closeUnpaired acc (Cell bp ep p,toks) = dbg "closeUnpaired" $
82 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
83 PairHash | (Tree0 (Cell bt et (TokenPlain t))) :< ts <- Seq.viewl $ toks <> acc ->
84 case Text.findIndex (not . isTagChar) t of
85 -- Just 0 -> toksHash mempty <> toks <> acc
87 Tree0 (Cell bp bt{columnPos = columnPos bt + i} (TokenTag tag))
88 <| Tree0 (Cell bt{columnPos = columnPos bt + i + 1} et (TokenPlain t'))
90 where (tag,t') = Text.splitAt i t
91 Nothing | Text.null t -> toksHash mempty <> toks <> acc
92 Nothing -> Tree0 (Cell bp et (TokenTag t)) <| ts
93 _ -> toksHash tokensPlainEmpty <> toks <> acc
95 toksHash = tokens1 . Tree0 . Cell bp ep . TokenPlain . fst . pairBorders p
99 case Char.generalCategory c of
100 Char.DashPunctuation -> True
101 Char.ConnectorPunctuation -> True
104 -- | Close remaining 'Pair's at end of parsing.
105 closePairs :: Pairs -> Tokens
106 closePairs (t0,ps) = dbg "closePairs" $
107 t0 <> foldl' closeUnpaired mempty ps
109 appendLexeme :: Lexeme -> Pairs -> Pairs
110 appendLexeme lex acc =
113 LexemePairOpen ps -> foldl' open acc ps
115 open a p@(Cell _bp ep (PairElem{})) = openPair a p `appendToken` (Tree0 $ Cell ep ep $ TokenPlain "")
116 open a p = openPair a p
117 LexemePairClose ps -> foldl' closePair acc ps
118 LexemePairAny ps -> appendTokens acc $ tokens $ Tree0 . ((\p -> TokenPlain $ fst $ pairBorders p mempty) <$>) <$> ps
119 LexemePairBoth ps -> appendTokens acc $ tokens $ (`TreeN`mempty) <$> ps
120 LexemeEscape c -> appendToken acc $ Tree0 $ TokenEscape <$> c
121 LexemeLink t -> appendToken acc $ Tree0 $ TokenLink <$> t
122 LexemeWhite (unCell -> "") -> acc
123 LexemeWhite cs -> appendToken acc $ Tree0 $ TokenPlain <$> cs
124 LexemeAlphaNum cs -> appendToken acc $ Tree0 $ TokenPlain . Text.pack <$> cs
125 LexemeAny cs -> appendToken acc $ Tree0 $ TokenPlain . Text.pack <$> cs
126 LexemeToken ts -> appendTokens acc ts
130 = LexemePairOpen ![Cell Pair]
131 | LexemePairClose ![Cell Pair]
132 | LexemePairAny ![Cell Pair]
133 | LexemePairBoth ![Cell Pair]
134 | LexemeEscape !(Cell Char)
135 | LexemeLink !(Cell Text)
136 | LexemeWhite !(Cell White)
137 | LexemeAlphaNum !(Cell [Char])
138 | LexemeAny !(Cell [Char])
139 | LexemeToken !Tokens
142 p_Tokens :: Parser e s Tokens
143 p_Tokens = pdbg "Tokens" $
145 foldr appendLexeme mempty .
148 (LexemeWhite (cell0 "") :) <$>
149 go [LexemeWhite (cell0 "")]
151 go :: [Lexeme] -> Parser e s [Lexeme]
154 (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
156 mangleLexemes = \case
157 LexemeAny (Cell _bx ex x):LexemeAny (Cell by _ey y):acc -> LexemeAny (Cell by ex (x<>y)):acc
160 w@LexemeWhite{}:LexemePairAny p:acc -> w:LexemePairClose p:acc
162 LexemePairAny p:w@LexemeWhite{}:acc -> LexemePairOpen p:w:acc
165 LexemePairAny p:a@LexemeAny{}:w@LexemeWhite{}:acc -> LexemePairOpen p:a:w:acc
167 w@LexemeWhite{}:a@LexemeAny{}:LexemePairAny p:acc -> w:a:LexemePairClose p:acc
170 an@LexemeAlphaNum{}:a@LexemeAny{}:LexemePairAny p:acc -> an:a:LexemePairClose p:acc
172 an@LexemeAlphaNum{}:LexemePairAny p:a@LexemeAny{}:acc -> an:LexemePairOpen p:a:acc
175 c@LexemePairClose{}:LexemePairAny p:acc -> c:LexemePairClose p:acc
177 LexemePairAny p:o@LexemePairOpen{}:acc -> LexemePairOpen p:o:acc
180 o@LexemePairOpen{}:LexemePairAny p:acc -> o:LexemePairClose p:acc
182 LexemePairAny p:c@LexemePairClose{}:acc -> c:LexemePairClose p:acc
186 pairAny :: Char -> Maybe Pair
189 '/' -> Just PairSlash
190 '"' -> Just PairDoublequote
191 '\'' -> Just PairSinglequote
192 '`' -> Just PairBackquote
193 '_' -> Just PairUnderscore
198 pairOpen :: Char -> Maybe Pair
200 '(' -> Just PairParen
201 '[' -> Just PairBracket
202 '{' -> Just PairBrace
203 '«' -> Just PairFrenchquote
206 pairClose :: Char -> Maybe Pair
208 ')' -> Just PairParen
209 ']' -> Just PairBracket
210 '}' -> Just PairBrace
211 '»' -> Just PairFrenchquote
214 p_Cell :: Parser e s a -> Parser e s (Cell a)
219 return $ Cell bp ep a
221 p_Lexeme :: Parser e s Lexeme
222 p_Lexeme = pdbg "Lexeme" $
224 [ P.try $ LexemeWhite <$> p_Cell p_Spaces
225 , P.try $ LexemePairAny <$> P.some (p_Cell $ p_satisfyMaybe pairAny)
226 , P.try $ LexemePairBoth <$> P.some (P.try $ p_Cell p_ElemSingle)
227 , P.try $ LexemePairOpen <$> P.some (p_Cell $ p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
228 , P.try $ LexemePairClose <$> P.some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose)
229 , P.try $ LexemeEscape <$> p_Cell p_Escape
230 , P.try $ LexemeLink <$> p_Cell p_Link
231 , P.try $ LexemeAlphaNum <$> p_Cell (P.some p_AlphaNum)
232 , LexemeAny <$> p_Cell (pure <$> P.anyChar)
235 p_AlphaNum :: Parser e s Char
236 p_AlphaNum = P.satisfy Char.isAlphaNum
238 p_Escape :: Parser e s Char
239 p_Escape = P.char '\\' *> P.satisfy Char.isPrint
241 p_Link :: Parser e s Text
243 P.try (P.char '<' *> p <* P.char '>') <|>
247 (\scheme addr -> Text.pack $ scheme <> "//" <> addr)
248 <$> P.option "" (P.try p_scheme)
253 <$> P.some (P.satisfy $ \c ->
274 p_ElemSingle :: Parser e s Pair
275 p_ElemSingle = pdbg "ElemSingle" $
282 p_ElemOpen :: Parser e s Pair
283 p_ElemOpen = pdbg "ElemOpen" $
290 p_ElemClose :: Parser e s Pair
291 p_ElemClose = pdbg "ElemClose" $
298 p_ElemOpenOrSingle :: Parser e s Pair
301 P.char '>' $> LexemePairOpen p <|>
302 P.string "/>" $> LexemePairAny p