1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
10 import Data.Char (Char)
11 import Data.Either (Either(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), ($>), (<$))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (ViewL(..), (<|))
21 import Data.Text (Text)
22 import Data.Text.Buildable (Buildable(..))
23 import Data.Tuple (fst,snd)
24 import Text.Show (Show(..))
25 import qualified Data.Char as Char
26 import qualified Data.Sequence as Seq
27 import qualified Data.Set as Set
28 import qualified Data.Text as Text
29 import qualified Data.Text.Lazy as TL
30 import qualified Data.Text.Lazy.Builder as Builder
31 import qualified Text.Megaparsec as P
32 import qualified Text.Megaparsec.Prim as P
34 import Language.TCT.Token
35 import Language.TCT.Elem -- hiding (dbg)
36 import Language.TCT.Read.Elem -- hiding (pdbg)
39 import Debug.Trace (trace)
40 dbg m x = trace (m <> ": " <> show x) x
44 textOf :: Buildable a => a -> Text
45 textOf = TL.toStrict . Builder.toLazyText . build
48 type Pairs = (Tokens,[(Pair,Tokens)])
50 appendToken :: Pairs -> Token -> Pairs
51 appendToken ps = appendTokens ps . Tokens . Seq.singleton
53 appendTokens :: Pairs -> Tokens -> Pairs
54 appendTokens (t,[]) toks = (t<>toks,[])
55 appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
57 openPair :: Pairs -> Pair -> Pairs
58 openPair (t,ms) p = (t,(p,mempty):ms)
60 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
61 closePair :: Pairs -> Pair -> Pairs
62 closePair (t,[]) p = dbg "closePair" $
63 (t<>tokens1 (TokenPlain $ snd $ pairBorders p tokensPlainEmpty),[])
64 closePair (t,(p1,t1):ts) p = dbg "closePair" $
66 (PairElem x ax, PairElem y ay) | x == y ->
67 appendToken (t,ts) $ TokenPair (PairElem x (ax<>ay)) t1
68 (x,y) | x == y -> appendToken (t,ts) $ TokenPair p1 t1
73 (closeUnpaired mempty (p1,t1))
75 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
76 closeUnpaired :: Tokens -> (Pair,Tokens) -> Tokens
77 closeUnpaired acc (p,toks) = dbg "closeUnpaired" $
79 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
80 PairHash | TokenPlain t :< ts <- Seq.viewl $ unTokens $ toks <> acc ->
81 case Text.findIndex (not . isTagChar) t of
82 Just 0 -> toksHash <> toks <> acc
83 Just i -> Tokens $ TokenTag tag <| TokenPlain t' <| ts
84 where (tag,t') = Text.splitAt i t
85 Nothing | Text.null t -> toksHash <> toks <> acc
86 Nothing -> Tokens $ TokenTag t <| ts
87 where toksHash = tokens1 $ TokenPlain $ fst $ pairBorders p mempty
88 _ -> tokens1 (TokenPlain $ fst $ pairBorders p tokensPlainEmpty) <> toks <> acc
93 case Char.generalCategory c of
94 Char.DashPunctuation -> True
95 Char.ConnectorPunctuation -> True
98 -- | Close remaining 'Pair's at end of parsing.
99 closePairs :: Pairs -> Tokens
100 closePairs (t0,ps) = dbg "closePairs" $
101 t0 <> foldl' closeUnpaired mempty ps
103 appendLexeme :: Lexeme -> Pairs -> Pairs
104 appendLexeme lex acc =
106 case dbg "appendLexeme" lex of
107 LexemePairOpen ps -> foldl' open acc ps
109 open a p@PairElem{} = openPair a p `appendToken` TokenPlain ""
110 open a p = openPair a p
111 LexemePairClose ps -> foldl' closePair acc ps
112 LexemePairAny ps -> appendTokens acc $ tokens $ TokenPlain . fst . (`pairBorders` mempty) <$> ps
113 LexemePairBoth ps -> appendTokens acc $ tokens $ (`TokenPair`mempty) <$> ps
114 LexemeEscape c -> appendToken acc $ TokenEscape c
115 LexemeLink t -> appendToken acc $ TokenLink t
116 LexemeWhite cs -> appendToken acc $ TokenPlain cs
117 LexemeAlphaNum cs -> appendToken acc $ TokenPlain $ Text.pack cs
118 LexemeChar c -> appendToken acc $ TokenPlain $ Text.singleton c
119 LexemeToken ts -> appendTokens acc ts
123 = LexemePairOpen ![Pair]
124 | LexemePairClose ![Pair]
125 | LexemePairAny ![Pair]
126 | LexemePairBoth ![Pair]
130 | LexemeAlphaNum ![Char]
132 | LexemeToken !Tokens
135 p_satisfyMaybe :: (P.MonadParsec e s m, P.Token s ~ Char) => (Char -> Maybe a) -> m a
136 p_satisfyMaybe f = P.token testChar Nothing
141 Nothing -> Left (Set.singleton $ P.Tokens $ c:|[], Set.empty, Set.empty)
143 p_Tokens :: Parser e s Tokens
144 p_Tokens = pdbg "Tokens" $
146 foldr appendLexeme mempty .
149 (LexemeWhite "" :) <$>
152 go :: [Lexeme] -> Parser e s [Lexeme]
155 (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
157 mangleLexemes = \case
158 w@LexemeWhite{} :p@LexemePairAny{}:acc -> w:any2close p:acc
159 p@LexemePairAny{}:w@LexemeWhite{} :acc -> any2open p:w:acc
161 l@LexemeAlphaNum{}:c@LexemeChar{} :p@LexemePairAny{}:acc -> l:c:any2close p:acc
162 l@LexemeAlphaNum{}:p@LexemePairAny{}:c@LexemeChar{}:acc -> l:any2open p:c:acc
166 any2close,any2open :: Lexeme -> Lexeme
167 any2close (LexemePairAny ps) = LexemePairClose ps
169 any2open (LexemePairAny ps) = LexemePairOpen ps
172 pairAny :: Char -> Maybe Pair
175 '/' -> Just PairSlash
176 '"' -> Just PairDoublequote
177 '\'' -> Just PairSinglequote
178 '`' -> Just PairBackquote
179 '_' -> Just PairUnderscore
184 pairOpen :: Char -> Maybe Pair
186 '(' -> Just PairParen
187 '[' -> Just PairBracket
188 '{' -> Just PairBrace
189 '«' -> Just PairFrenchquote
192 pairClose :: Char -> Maybe Pair
194 ')' -> Just PairParen
195 ']' -> Just PairBracket
196 '}' -> Just PairBrace
197 '»' -> Just PairFrenchquote
200 p_Lexeme :: Parser e s Lexeme
201 p_Lexeme = pdbg "Lexeme" $
203 [ P.try $ LexemeWhite <$> p_Spaces
204 , P.try $ LexemePairAny <$> P.some (p_satisfyMaybe pairAny)
205 , P.try $ LexemePairBoth <$> P.some (P.try p_ElemSingle)
206 , P.try $ LexemePairOpen <$> P.some (p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
207 , P.try $ LexemePairClose <$> P.some (p_satisfyMaybe pairClose <|> P.try p_ElemClose)
208 , P.try $ LexemeEscape <$> p_Escape
209 , P.try $ LexemeLink <$> p_Link
210 , P.try $ LexemeAlphaNum <$> P.some p_AlphaNum
211 , LexemeChar <$> P.anyChar
214 p_AlphaNum :: Parser e s Char
215 p_AlphaNum = P.satisfy Char.isAlphaNum
217 p_Escape :: Parser e s Char
218 p_Escape = P.char '\\' *> P.satisfy Char.isPrint
220 p_Link :: Parser e s Text
222 (\scheme ss addr -> Text.pack $ scheme <> ss <> addr)
223 <$> P.option "" (P.try p_scheme)
229 <$> P.some (P.satisfy $ \c ->
250 p_ElemSingle :: Parser e s Pair
251 p_ElemSingle = pdbg "ElemSingle" $
258 p_ElemOpen :: Parser e s Pair
259 p_ElemOpen = pdbg "ElemOpen" $
266 p_ElemClose :: Parser e s Pair
267 p_ElemClose = pdbg "ElemClose" $
274 p_ElemOpenOrSingle :: Parser e s Pair
277 P.char '>' $> LexemePairOpen p <|>
278 P.string "/>" $> LexemePairAny p