1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Token where
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
10 import Data.Char (Char)
11 import Data.Either (Either(..))
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>), ($>), (<$))
16 import Data.List.NonEmpty (NonEmpty(..))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence (ViewL(..), (<|))
21 import Data.Text (Text)
22 import Data.Text.Buildable (Buildable(..))
23 import Data.Tuple (fst,snd)
24 import Text.Show (Show(..))
25 import qualified Data.Char as Char
26 import qualified Data.Sequence as Seq
27 import qualified Data.Set as Set
28 import qualified Data.Text as Text
29 import qualified Data.Text.Lazy as TL
30 import qualified Data.Text.Lazy.Builder as Builder
31 import qualified Text.Megaparsec as P
32 import qualified Text.Megaparsec.Prim as P
34 import Language.TCT.Token
35 import Language.TCT.Elem -- hiding (dbg)
36 import Language.TCT.Read.Elem -- hiding (pdbg)
39 import Debug.Trace (trace)
40 dbg m x = trace (m <> ": " <> show x) x
44 textOf :: Buildable a => a -> Text
45 textOf = TL.toStrict . Builder.toLazyText . build
48 type Pairs = (Tokens,[(Pair,Tokens)])
50 appendToken :: Pairs -> Token -> Pairs
51 appendToken ps = appendTokens ps . Tokens . Seq.singleton
53 appendTokens :: Pairs -> Tokens -> Pairs
54 appendTokens (t,[]) toks = (t<>toks,[])
55 appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
57 openPair :: Pairs -> Pair -> Pairs
58 openPair (t,ms) p = (t,(p,mempty):ms)
60 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
61 closePair :: Pairs -> Pair -> Pairs
62 closePair (t,[]) p = dbg "closePair" $
63 (t<>tokens1 (TokenPlain $ snd $ pairBorders p tokensPlainEmpty),[])
64 closePair (t,(p1,t1):ts) p = dbg "closePair" $
66 (PairElem x ax, PairElem y ay) | x == y ->
67 appendToken (t,ts) $ TokenPair (PairElem x (ax<>ay)) t1
68 (x,y) | x == y -> appendToken (t,ts) $ TokenPair p1 t1
73 (closeUnpaired mempty (p1,t1))
75 -- | Close a 'Pair' when there is not a matching 'LexemePairClose'.
76 closeUnpaired :: Tokens -> (Pair,Tokens) -> Tokens
77 closeUnpaired acc (p,toks) = dbg "closeUnpaired" $
79 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
80 PairHash | TokenPlain t :< ts <- Seq.viewl $ unTokens $ toks <> acc ->
81 case Text.findIndex (not . isTagChar) t of
82 Just 0 -> toksHash <> toks <> acc
83 Just i -> Tokens $ TokenTag tag <| TokenPlain t' <| ts
84 where (tag,t') = Text.splitAt i t
85 Nothing | Text.null t -> toksHash <> toks <> acc
86 Nothing -> Tokens $ TokenTag t <| ts
87 where toksHash = tokens1 $ TokenPlain $ fst $ pairBorders p mempty
88 _ -> tokens1 (TokenPlain $ fst $ pairBorders p tokensPlainEmpty) <> toks <> acc
93 case Char.generalCategory c of
94 Char.DashPunctuation -> True
95 Char.ConnectorPunctuation -> True
98 -- | Close remaining 'Pair's at end of parsing.
99 closePairs :: Pairs -> Tokens
100 closePairs (t0,ps) = dbg "closePairs" $
101 t0 <> foldl' closeUnpaired mempty ps
103 appendLexeme :: Lexeme -> Pairs -> Pairs
104 appendLexeme lex acc =
106 case dbg "appendLexeme" lex of
107 LexemePairOpen ps -> foldl' open acc ps
109 open a p@PairElem{} = openPair a p `appendToken` TokenPlain ""
110 open a p = openPair a p
111 LexemePairClose ps -> foldl' closePair acc ps
112 LexemePairAny ps -> appendTokens acc $ tokens $ TokenPlain . fst . (`pairBorders` mempty) <$> ps
113 LexemePairBoth ps -> appendTokens acc $ tokens $ (`TokenPair`mempty) <$> ps
114 LexemeEscape c -> appendToken acc $ TokenEscape c
115 LexemeLink t -> appendToken acc $ TokenLink t
116 LexemeWhite "" -> acc
117 LexemeWhite cs -> appendToken acc $ TokenPlain cs
118 LexemeAlphaNum cs -> appendToken acc $ TokenPlain $ Text.pack cs
119 LexemeChar c -> appendToken acc $ TokenPlain $ Text.singleton c
120 LexemeToken ts -> appendTokens acc ts
124 = LexemePairOpen ![Pair]
125 | LexemePairClose ![Pair]
126 | LexemePairAny ![Pair]
127 | LexemePairBoth ![Pair]
131 | LexemeAlphaNum ![Char]
133 | LexemeToken !Tokens
136 p_satisfyMaybe :: (P.MonadParsec e s m, P.Token s ~ Char) => (Char -> Maybe a) -> m a
137 p_satisfyMaybe f = P.token testChar Nothing
142 Nothing -> Left (Set.singleton $ P.Tokens $ c:|[], Set.empty, Set.empty)
144 p_Tokens :: Parser e s Tokens
145 p_Tokens = pdbg "Tokens" $
147 foldr appendLexeme mempty .
150 (LexemeWhite "" :) <$>
153 go :: [Lexeme] -> Parser e s [Lexeme]
156 (p_Lexeme >>= \next -> go $ mangleLexemes $ next:acc)
158 mangleLexemes = \case
159 w@LexemeWhite{} :p@LexemePairAny{}:acc -> w:any2close p:acc
160 p@LexemePairAny{}:w@LexemeWhite{} :acc -> any2open p:w:acc
162 l@LexemeAlphaNum{}:c@LexemeChar{} :p@LexemePairAny{}:acc -> l:c:any2close p:acc
163 l@LexemeAlphaNum{}:p@LexemePairAny{}:c@LexemeChar{}:acc -> l:any2open p:c:acc
167 any2close,any2open :: Lexeme -> Lexeme
168 any2close (LexemePairAny ps) = LexemePairClose ps
170 any2open (LexemePairAny ps) = LexemePairOpen ps
173 pairAny :: Char -> Maybe Pair
176 '/' -> Just PairSlash
177 '"' -> Just PairDoublequote
178 '\'' -> Just PairSinglequote
179 '`' -> Just PairBackquote
180 '_' -> Just PairUnderscore
185 pairOpen :: Char -> Maybe Pair
187 '(' -> Just PairParen
188 '[' -> Just PairBracket
189 '{' -> Just PairBrace
190 '«' -> Just PairFrenchquote
193 pairClose :: Char -> Maybe Pair
195 ')' -> Just PairParen
196 ']' -> Just PairBracket
197 '}' -> Just PairBrace
198 '»' -> Just PairFrenchquote
201 p_Lexeme :: Parser e s Lexeme
202 p_Lexeme = pdbg "Lexeme" $
204 [ P.try $ LexemeWhite <$> p_Spaces
205 , P.try $ LexemePairAny <$> P.some (p_satisfyMaybe pairAny)
206 , P.try $ LexemePairBoth <$> P.some (P.try p_ElemSingle)
207 , P.try $ LexemePairOpen <$> P.some (p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
208 , P.try $ LexemePairClose <$> P.some (p_satisfyMaybe pairClose <|> P.try p_ElemClose)
209 , P.try $ LexemeEscape <$> p_Escape
210 , P.try $ LexemeLink <$> p_Link
211 , P.try $ LexemeAlphaNum <$> P.some p_AlphaNum
212 , LexemeChar <$> P.anyChar
215 p_AlphaNum :: Parser e s Char
216 p_AlphaNum = P.satisfy Char.isAlphaNum
218 p_Escape :: Parser e s Char
219 p_Escape = P.char '\\' *> P.satisfy Char.isPrint
221 p_Link :: Parser e s Text
223 (\scheme ss addr -> Text.pack $ scheme <> ss <> addr)
224 <$> P.option "" (P.try p_scheme)
230 <$> P.some (P.satisfy $ \c ->
251 p_ElemSingle :: Parser e s Pair
252 p_ElemSingle = pdbg "ElemSingle" $
259 p_ElemOpen :: Parser e s Pair
260 p_ElemOpen = pdbg "ElemOpen" $
267 p_ElemClose :: Parser e s Pair
268 p_ElemClose = pdbg "ElemClose" $
275 p_ElemOpenOrSingle :: Parser e s Pair
278 P.char '>' $> LexemePairOpen p <|>
279 P.string "/>" $> LexemePairAny p