]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read/Elem.hs
Remove NodeGroup, as it can break parsing based on Seq.spanl.
[doclang.git] / Language / TCT / Read / Elem.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Elem where
6
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
9 import Data.Bool
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Function (($))
13 import Data.Functor ((<$>), (<$))
14 import Data.Maybe (Maybe(..))
15 import Data.Semigroup (Semigroup(..))
16 import qualified Data.Char as Char
17 import qualified Text.Megaparsec as P
18 import qualified Text.Megaparsec.Char as P
19 import qualified Data.Text.Lazy as TL
20
21 import Language.TCT.Debug
22 import Language.TCT.Elem
23 import Language.TCT.Tree
24 import Language.TCT.Read.Cell
25
26 -- * Word
27 p_Spaces :: P.Tokens s ~ TL.Text => Parser e s TL.Text
28 p_Spaces = P.takeWhileP (Just "Space") Char.isSpace
29 p_Spaces1 :: P.Tokens s ~ TL.Text => Parser e s TL.Text
30 p_Spaces1 = P.takeWhile1P (Just "Space") Char.isSpace
31 p_HSpaces :: P.Tokens s ~ TL.Text => Parser e s TL.Text
32 p_HSpaces = P.takeWhileP (Just "HSpace") (==' ')
33 p_Digits :: P.Tokens s ~ TL.Text => Parser e s TL.Text
34 p_Digits = P.takeWhileP (Just "Digit") Char.isDigit
35 p_Digits1 :: P.Tokens s ~ TL.Text => Parser e s TL.Text
36 p_Digits1 = P.takeWhile1P (Just "Digit1") Char.isDigit
37 p_AlphaNums :: P.Tokens s ~ TL.Text => Parser e s TL.Text
38 p_AlphaNums = P.takeWhileP (Just "AlphaNum") Char.isAlphaNum
39 p_AlphaNums1 :: P.Tokens s ~ TL.Text => Parser e s TL.Text
40 p_AlphaNums1 = P.takeWhile1P (Just "AlphaNum1") Char.isAlphaNum
41 {-
42 -- NOTE: could be done with TL.Text, which has a less greedy (<>).
43 p_Word :: Parser e Text Text
44 p_Word = debugParser "Word" $ P.try p_take <|> p_copy
45 where
46 p_take = do
47 P.notFollowedBy $ P.satisfy $ not . Char.isAlphaNum
48 w <- P.takeWhile1P (Just "Word") $ \c ->
49 Char.isAlphaNum c ||
50 c == '_' ||
51 c == '-'
52 guard $ Char.isAlphaNum $ Text.last w
53 return w
54 p_copy =
55 (<>)
56 <$> p_AlphaNums1
57 <*> P.option "" (P.try $
58 (<>)
59 <$> ((Text.pack <$>) $ P.some $ P.char '_' <|> P.char '-')
60 <*> p_copy)
61 -}
62
63 -- * Elem
64 p_ElemSingle :: P.Tokens s ~ TL.Text => Parser e s Pair
65 p_ElemSingle = debugParser "ElemSingle" $
66 PairElem
67 <$ P.char '<'
68 <*> p_ElemName
69 <*> p_ElemAttrs
70 <* P.string "/>"
71
72 p_ElemOpen :: P.Tokens s ~ TL.Text => Parser e s Pair
73 p_ElemOpen = debugParser "ElemOpen" $
74 PairElem
75 <$ P.char '<'
76 <*> p_ElemName
77 <*> p_ElemAttrs
78 <* P.char '>'
79
80 p_ElemName :: P.Tokens s ~ TL.Text => Parser e s ElemName
81 p_ElemName = p_AlphaNums1
82 -- TODO: namespace
83
84 p_ElemClose :: P.Tokens s ~ TL.Text => Parser e s Pair
85 p_ElemClose = debugParser "ElemClose" $
86 (`PairElem` [])
87 <$ P.string "</"
88 <*> p_ElemName
89 <* P.char '>'
90
91 {-
92 p_ElemOpenOrSingle :: Parser e Text Pair
93 p_ElemOpenOrSingle =
94 p_ElemOpen >>= \p ->
95 P.char '>' $> LexemePairOpen p <|>
96 P.string "/>" $> LexemePairAny p
97 -}
98
99 -- * 'ElemAttr'
100 p_ElemAttrs :: P.Tokens s ~ TL.Text => Parser e s [(White,ElemAttr)]
101 p_ElemAttrs = P.many $ P.try $ (,) <$> p_Spaces <*> p_ElemAttr
102 p_ElemAttr :: P.Tokens s ~ TL.Text => Parser e s ElemAttr
103 p_ElemAttr = P.try p_ElemAttrEq <|> p_ElemAttrName
104
105 p_ElemAttrEq :: P.Tokens s ~ TL.Text => Parser e s ElemAttr
106 p_ElemAttrEq =
107 (\n (o,v,c) -> ElemAttr n ("="<>o) v c)
108 <$> p_ElemName
109 <* P.char '='
110 <*> p_ElemAttrValue
111 p_ElemAttrName :: P.Tokens s ~ TL.Text => Parser e s ElemAttr
112 p_ElemAttrName =
113 (\n -> ElemAttr n "" "" "")
114 <$> p_ElemName
115
116 p_ElemAttrValue :: P.Tokens s ~ TL.Text => Parser e s (TL.Text,TL.Text,TL.Text)
117 p_ElemAttrValue =
118 p_ElemAttrValueQuote '\'' <|>
119 p_ElemAttrValueQuote '"' <|>
120 p_ElemAttrValueWord
121
122 p_ElemAttrValueQuote :: Char -> P.Tokens s ~ TL.Text => Parser e s (TL.Text,TL.Text,TL.Text)
123 p_ElemAttrValueQuote q =
124 (\o v c -> (TL.singleton o, v, TL.singleton c))
125 <$> P.char q
126 <*> P.takeWhile1P (Just "ElemAttrValueQuoted") (/=q)
127 <*> P.char q
128 p_ElemAttrValueWord :: P.Tokens s ~ TL.Text => Parser e s (TL.Text,TL.Text,TL.Text)
129 p_ElemAttrValueWord = do
130 w <- P.takeWhile1P (Just "ElemAttrValueWord") $ \c ->
131 Char.isPrint c &&
132 not (Char.isSpace c) &&
133 c /= '\'' &&
134 c /= '"' &&
135 c /= '=' &&
136 c /= '/' &&
137 c /= '<' &&
138 c /= '>'
139 return ("",w,"")