1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 {-# LANGUAGE ViewPatterns #-}
6 module Language.TCT.Read.Token where
8 -- import Data.Text.Buildable (Buildable(..))
9 -- import qualified Data.Text.Lazy as TL
10 -- import qualified Data.Text.Lazy.Builder as Builder
11 import Control.Applicative (Applicative(..), Alternative(..))
12 import Control.Monad (Monad(..))
14 import Data.Char (Char)
16 import Data.Eq (Eq(..))
17 import Data.Ord (Ord(..))
18 import Data.Foldable (Foldable(..))
19 import Data.Sequence (Seq)
20 import Data.Function (($), (.))
21 import Data.Functor ((<$>), ($>), (<$))
22 import Data.Maybe (Maybe(..))
23 import Data.Monoid (Monoid(..))
24 import Data.Semigroup (Semigroup(..))
25 import Data.Sequence (ViewL(..), (<|))
26 import Data.Text (Text)
27 import Data.TreeSeq.Strict (Tree(..))
28 import Data.Tuple (fst,snd)
29 import Prelude (Num(..))
30 import Text.Show (Show(..))
31 import qualified Data.Char as Char
32 import qualified Data.Sequence as Seq
33 import qualified Data.Text as Text
34 import qualified Text.Megaparsec as P
35 import qualified Text.Megaparsec.Char as P
36 import qualified System.FilePath as FP
38 import Language.TCT.Cell
39 import Language.TCT.Elem
40 import Language.TCT.Read.Elem
41 import Language.TCT.Read.Cell
44 -- | In normal order: a list of 'Key's, maybe ended by 'Value', all read on the same line.
45 type Row = [Tree (Cell Key) (Cell Value)]
49 = KeyColon !Name !White -- ^ @name: @
50 | KeyEqual !Name !White -- ^ @name=@
51 | KeyBar !Name !White -- ^ @name|@
52 | KeyGreat !Name !White -- ^ @name>@
53 | KeyLower !Name !Attrs -- ^ @<name a=b@
54 | KeyDot !Name -- ^ @1. @
56 | KeyDashDash -- ^ @-- @
57 | KeySection !LevelSection -- ^ @# @
58 | KeyBrackets !Name -- ^ @[name]@
59 | KeyDotSlash !PathFile -- ^ @./file @
61 deriving (Eq, Ord, Show)
70 type PathFile = FP.FilePath
72 -- ** Type 'LevelSection'
73 type LevelSection = Int
76 -- | In reverse order: a list of nodes in scope
77 -- (hence to which the next line can append to).
78 type Rows = [Tree (Cell Key) (Cell Value)]
81 type Token = Tree (Cell TokenKey) (Cell TokenValue)
84 type Tokens = Seq Token
89 = PairHash -- ^ @#value#@
90 | PairElem !Elem !Attrs -- ^ @<elem n0=v0 n1=v1>value</elem>@
91 | PairStar -- ^ @*value*@
92 | PairSlash -- ^ @/value/@
93 | PairUnderscore -- ^ @_value_@
94 | PairDash -- ^ @-value-@
95 | PairBackquote -- ^ @`value`@
96 | PairSinglequote -- ^ @'value'@
97 | PairDoublequote -- ^ @"value"@
98 | PairFrenchquote -- ^ @«value»@
99 | PairParen -- ^ @(value)@
100 | PairBrace -- ^ @{value}@
101 | PairBracket -- ^ @[value]@
102 deriving (Eq,Ord,Show)
104 -- ** Type 'TokenValue'
110 | TokenTree (Tree (Cell Key) (Cell Value))
111 deriving (Eq,Ord,Show)
117 -- | Right-only Dyck language
118 type Pairs = (Tokens,[Opening])
121 type Opening = (Cell Pair,Tokens)
123 appendToken :: Pairs -> Token -> Pairs
124 appendToken ps = appendTokens ps . Seq.singleton
126 appendTokens :: Pairs -> Tokens -> Pairs
127 appendTokens (t,[]) toks = (t<>toks,[])
128 appendTokens (t,(p0,t0):ps) toks = (t,(p0,t0<>toks):ps)
130 openPair :: Pairs -> Cell Pair -> Pairs
131 openPair (t,ms) p = (t,(p,mempty):ms)
133 -- | Close a 'Pair' when there is a matching 'LexemePairClose'.
134 closePair :: Pairs -> Cell Pair -> Pairs
135 closePair ps@(_,[]) (Cell bp ep p) = dbg "closePair" $
138 TokenPlain $ snd $ pairBorders p tokensPlainEmpty
139 closePair (t,(p1,t1):ts) p = dbg "closePair" $
141 (Cell bx _ex (PairElem x ax), Cell _by ey (PairElem y ay)) | x == y ->
143 TreeN (Cell bx ey $ PairElem x (ax<>ay)) t1
144 (Cell bx _ex x, Cell _by ey y) | x == y ->
146 TreeN (Cell bx ey x) t1
151 (closeImpaired mempty (p1,t1))
153 -- | Close a 'Pair' when there is no matching 'LexemePairClose'.
154 closeImpaired :: Tokens -> (Cell Pair,Tokens) -> Tokens
155 closeImpaired acc (Cell bp ep p,toks) = dbg "closeImpaired" $
157 -- NOTE: try to close 'PairHash' as 'TokenTag' instead of 'TokenPlain'.
158 PairHash | (Tree0 (Cell bt et (TokenPlain t))) :< ts <- Seq.viewl $ toks <> acc ->
159 case Text.span isTagChar t of
160 ("",_) | Text.null t -> toksHash mempty <> toks <> acc
161 | otherwise -> Tree0 (Cell bp et (TokenTag t)) <| ts
163 let len = Text.length tag in
164 Tree0 (Cell bp bt{columnPos = columnPos bt + len} (TokenTag tag)) <|
165 Tree0 (Cell bt{columnPos = columnPos bt + len + 1} et (TokenPlain t')) <|
167 _ -> toksHash tokensPlainEmpty <> toks <> acc
169 toksHash = tokens1 . Tree0 . Cell bp ep . TokenPlain . fst . pairBorders p
173 case Char.generalCategory c of
174 Char.DashPunctuation -> True
175 Char.ConnectorPunctuation -> True
178 -- | Close remaining 'Pair's at end of parsing.
179 closePairs :: Pairs -> Tokens
180 closePairs (t0,ps) = dbg "closePairs" $
181 t0 <> foldl' closeImpaired mempty ps
183 appendLexeme :: Lexeme -> Pairs -> Pairs
184 appendLexeme lex acc =
187 LexemePairOpen ps -> foldl' open acc ps
189 open a p@(Cell _bp ep (PairElem{})) = openPair a p `appendToken` (Tree0 $ Cell ep ep $ TokenPlain "")
190 open a p = openPair a p
191 LexemePairClose ps -> foldl' closePair acc ps
192 LexemePairAny ps -> appendTokens acc $ tokens $ Tree0 . ((\p -> TokenPlain $ fst $ pairBorders p mempty) <$>) <$> ps
193 LexemePairBoth ps -> appendTokens acc $ tokens $ (`TreeN`mempty) <$> ps
194 LexemeEscape c -> appendToken acc $ Tree0 $ TokenEscape <$> c
195 LexemeLink t -> appendToken acc $ Tree0 $ TokenLink <$> t
196 {-LexemeWhite (unCell -> "") -> acc-}
197 LexemeWhite (unCell -> Text.all (==' ') -> True) -> acc
198 LexemeWhite cs -> appendToken acc $ Tree0 $ TokenPlain <$> cs
199 LexemeAlphaNum cs -> appendToken acc $ Tree0 $ TokenPlain . Text.pack <$> cs
200 LexemeAny cs -> appendToken acc $ Tree0 $ TokenPlain . Text.pack <$> cs
201 -- LexemeToken ts -> appendTokens acc ts
203 appendLexemes :: Pairs -> [Lexeme] -> Pairs
204 appendLexemes = foldr appendLexeme
208 = LexemePairOpen ![Cell Pair]
209 | LexemePairClose ![Cell Pair]
210 | LexemePairAny ![Cell Pair]
211 | LexemePairBoth ![Cell Pair]
212 | LexemeEscape !(Cell Char)
213 | LexemeLink !(Cell Text)
214 | LexemeWhite !(Cell White)
215 | LexemeAlphaNum !(Cell [Char])
216 | LexemeAny !(Cell [Char])
217 | LexemeTree !(Tree (Cell Key) Tokens)
218 deriving (Eq, Ord, Show)
221 type Lexemes = Seq Lexeme
223 parseTokens :: [Lexeme] -> Tokens
226 appendLexemes mempty $
228 orientLexemePairAny $ LexemeWhite (cell0 "") :
231 -- | Parse 'Lexeme's, returning them in reverse order to apply 'orientLexemePairAny'.
232 p_Lexemes :: Parser e s [Lexeme]
233 p_Lexemes = pdbg "Lexemes" $ go []
235 go :: [Lexeme] -> Parser e s [Lexeme]
238 (p_Lexeme >>= \next -> go $ orientLexemePairAny $ next:acc)
240 orientLexemePairAny :: [Lexeme] -> [Lexeme]
241 orientLexemePairAny = \case
242 LexemeAny (Cell _bx ex x):LexemeAny (Cell by _ey y):acc -> LexemeAny (Cell by ex (x<>y)):acc
245 w@LexemeWhite{}:LexemePairAny p:acc -> w:LexemePairClose p:acc
247 LexemePairAny p:w@LexemeWhite{}:acc -> LexemePairOpen p:w:acc
248 LexemePairAny p:[] -> LexemePairOpen p:[]
251 LexemePairAny p:a@LexemeAny{}:w@LexemeWhite{}:acc -> LexemePairOpen p:a:w:acc
252 LexemePairAny p:a@LexemeAny{}:[] -> LexemePairOpen p:a:[]
254 w@LexemeWhite{}:a@LexemeAny{}:LexemePairAny p:acc -> w:a:LexemePairClose p:acc
257 an@LexemeAlphaNum{}:a@LexemeAny{}:LexemePairAny p:acc -> an:a:LexemePairClose p:acc
259 an@LexemeAlphaNum{}:LexemePairAny p:a@LexemeAny{}:acc -> an:LexemePairOpen p:a:acc
262 c@LexemePairClose{}:LexemePairAny p:acc -> c:LexemePairClose p:acc
264 LexemePairAny p:o@LexemePairOpen{}:acc -> LexemePairOpen p:o:acc
267 o@LexemePairOpen{}:LexemePairAny p:acc -> o:LexemePairClose p:acc
269 LexemePairAny p:c@LexemePairClose{}:acc -> c:LexemePairClose p:acc
273 p_Lexeme :: Parser e s Lexeme
274 p_Lexeme = pdbg "Lexeme" $
276 [ P.try $ LexemeWhite <$> p_Cell p_Spaces
277 , P.try $ LexemePairAny <$> P.some (p_Cell $ p_satisfyMaybe pairAny)
278 , P.try $ LexemePairBoth <$> P.some (P.try $ p_Cell p_ElemSingle)
279 , P.try $ LexemePairOpen <$> P.some (p_Cell $ p_satisfyMaybe pairOpen <|> P.try p_ElemOpen)
280 , P.try $ LexemePairClose <$> P.some (p_Cell $ p_satisfyMaybe pairClose <|> P.try p_ElemClose)
281 , P.try $ LexemeEscape <$> p_Cell p_Escape
282 , P.try $ LexemeLink <$> p_Cell p_Link
283 , P.try $ LexemeAlphaNum <$> p_Cell (P.some p_AlphaNum)
284 , LexemeAny <$> p_Cell (pure <$> P.anyChar)
287 p_Cell :: Parser e s a -> Parser e s (Cell a)
292 return $ Cell bp ep a
294 pairAny :: Char -> Maybe Pair
297 '/' -> Just PairSlash
298 '"' -> Just PairDoublequote
299 '\'' -> Just PairSinglequote
300 '`' -> Just PairBackquote
301 '_' -> Just PairUnderscore
306 pairOpen :: Char -> Maybe Pair
308 '(' -> Just PairParen
309 '[' -> Just PairBracket
310 '{' -> Just PairBrace
311 '«' -> Just PairFrenchquote
314 pairClose :: Char -> Maybe Pair
316 ')' -> Just PairParen
317 ']' -> Just PairBracket
318 '}' -> Just PairBrace
319 '»' -> Just PairFrenchquote
322 p_AlphaNum :: Parser e s Char
323 p_AlphaNum = P.satisfy Char.isAlphaNum
325 p_Escape :: Parser e s Char
326 p_Escape = P.char '\\' *> P.satisfy Char.isPrint
328 p_Link :: Parser e s Text
330 P.try (P.char '<' *> p <* P.char '>') <|>
334 (\scheme addr -> Text.pack $ scheme <> "//" <> addr)
335 <$> P.option "" (P.try p_scheme)
340 <$> P.some (P.satisfy $ \c ->
361 p_ElemSingle :: Parser e s Pair
362 p_ElemSingle = pdbg "ElemSingle" $
369 p_ElemOpen :: Parser e s Pair
370 p_ElemOpen = pdbg "ElemOpen" $
377 p_ElemClose :: Parser e s Pair
378 p_ElemClose = pdbg "ElemClose" $
385 p_ElemOpenOrSingle :: Parser e s Pair
388 P.char '>' $> LexemePairOpen p <|>
389 P.string "/>" $> LexemePairAny p
400 -- | Build 'Tokens' from many 'Token's.
401 tokens :: [Token] -> Tokens
402 tokens = Seq.fromList
404 -- | Build 'Tokens' from one 'Token'.
405 tokens1 :: Token -> Tokens
406 tokens1 = Seq.singleton
408 tokensPlainEmpty :: Tokens
409 tokensPlainEmpty = tokens1 $ Tree0 $ cell1 $ TokenPlain ""
411 isTokenWhite :: Token -> Bool
412 isTokenWhite (Tree0 (unCell -> TokenPlain t)) = Text.all Char.isSpace t
413 isTokenWhite _ = False
415 unTokenElem :: Tokens -> Maybe (Cell (Elem,Attrs,Tokens))
417 case toList $ Seq.dropWhileR isTokenWhite toks of
418 [TreeN (Cell bp ep (PairElem e as)) ts] -> Just (Cell bp ep (e,as,ts))
421 isTokenElem :: Tokens -> Bool
423 case toList $ Seq.dropWhileR isTokenWhite toks of
424 [TreeN (unCell -> PairElem{}) _] -> True
427 pairBorders :: TokenKey -> Tokens -> (Text,Text)
432 then ("<"<>e<>foldMap f attrs<>"/>","")
433 else ("<"<>e<>foldMap f attrs<>">","</"<>e<>">")
434 where f (attr_white,Attr{..}) =
440 PairHash -> ("#","#")
441 PairStar -> ("*","*")
442 PairSlash -> ("/","/")
443 PairUnderscore -> ("_","_")
444 PairDash -> ("-","-")
445 PairBackquote -> ("`","`")
446 PairSinglequote -> ("'","'")
447 PairDoublequote -> ("\"","\"")
448 PairFrenchquote -> ("«","»")
449 PairParen -> ("(",")")
450 PairBrace -> ("{","}")
451 PairBracket -> ("[","]")