]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read/Elem.hs
Add NodePara and NodeGroup.
[doclang.git] / Language / TCT / Read / Elem.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Language.TCT.Read.Elem where
6
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
9 import Data.Bool
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Function (($))
13 import Data.Functor ((<$>), (<$))
14 import Data.Maybe (Maybe(..))
15 import Data.Semigroup (Semigroup(..))
16 import qualified Data.Char as Char
17 import qualified Text.Megaparsec as P
18 import qualified Text.Megaparsec.Char as P
19 import qualified Data.Text.Lazy as TL
20
21 import Language.TCT.Elem
22 import Language.TCT.Tree
23 import Language.TCT.Read.Cell
24
25 -- * Word
26 p_Spaces :: P.Tokens s ~ TL.Text => Parser e s TL.Text
27 p_Spaces = P.takeWhileP (Just "Space") Char.isSpace
28 p_Spaces1 :: P.Tokens s ~ TL.Text => Parser e s TL.Text
29 p_Spaces1 = P.takeWhile1P (Just "Space") Char.isSpace
30 p_HSpaces :: P.Tokens s ~ TL.Text => Parser e s TL.Text
31 p_HSpaces = P.takeWhileP (Just "HSpace") (==' ')
32 p_Digits :: P.Tokens s ~ TL.Text => Parser e s TL.Text
33 p_Digits = P.takeWhile1P (Just "Digit") Char.isDigit
34 p_AlphaNums :: P.Tokens s ~ TL.Text => Parser e s TL.Text
35 p_AlphaNums = P.takeWhile1P (Just "AlphaNum") Char.isAlphaNum
36 {-
37 -- NOTE: could be done with TL.Text, which has a less greedy (<>).
38 p_Word :: Parser e Text Text
39 p_Word = pdbg "Word" $ P.try p_take <|> p_copy
40 where
41 p_take = do
42 P.notFollowedBy $ P.satisfy $ not . Char.isAlphaNum
43 w <- P.takeWhile1P (Just "Word") $ \c ->
44 Char.isAlphaNum c ||
45 c == '_' ||
46 c == '-'
47 guard $ Char.isAlphaNum $ Text.last w
48 return w
49 p_copy =
50 (<>)
51 <$> p_AlphaNums
52 <*> P.option "" (P.try $
53 (<>)
54 <$> ((Text.pack <$>) $ P.some $ P.char '_' <|> P.char '-')
55 <*> p_copy)
56 -}
57
58 -- * Elem
59 p_ElemSingle :: P.Tokens s ~ TL.Text => Parser e s Pair
60 p_ElemSingle = pdbg "ElemSingle" $
61 PairElem
62 <$ P.char '<'
63 <*> p_ElemName
64 <*> p_ElemAttrs
65 <* P.string "/>"
66
67 p_ElemOpen :: P.Tokens s ~ TL.Text => Parser e s Pair
68 p_ElemOpen = pdbg "ElemOpen" $
69 PairElem
70 <$ P.char '<'
71 <*> p_ElemName
72 <*> p_ElemAttrs
73 <* P.char '>'
74
75 p_ElemName :: P.Tokens s ~ TL.Text => Parser e s ElemName
76 p_ElemName = p_AlphaNums
77 -- TODO: namespace
78
79 p_ElemClose :: P.Tokens s ~ TL.Text => Parser e s Pair
80 p_ElemClose = pdbg "ElemClose" $
81 (`PairElem` [])
82 <$ P.string "</"
83 <*> p_ElemName
84 <* P.char '>'
85
86 {-
87 p_ElemOpenOrSingle :: Parser e Text Pair
88 p_ElemOpenOrSingle =
89 p_ElemOpen >>= \p ->
90 P.char '>' $> LexemePairOpen p <|>
91 P.string "/>" $> LexemePairAny p
92 -}
93
94 -- * 'ElemAttr'
95 p_ElemAttrs :: P.Tokens s ~ TL.Text => Parser e s [(White,ElemAttr)]
96 p_ElemAttrs = P.many $ P.try $ (,) <$> p_Spaces <*> p_ElemAttr
97 p_ElemAttr :: P.Tokens s ~ TL.Text => Parser e s ElemAttr
98 p_ElemAttr = P.try p_ElemAttrEq <|> p_ElemAttrName
99
100 p_ElemAttrEq :: P.Tokens s ~ TL.Text => Parser e s ElemAttr
101 p_ElemAttrEq =
102 (\n (o,v,c) -> ElemAttr n ("="<>o) v c)
103 <$> p_ElemName
104 <* P.char '='
105 <*> p_ElemAttrValue
106 p_ElemAttrName :: P.Tokens s ~ TL.Text => Parser e s ElemAttr
107 p_ElemAttrName =
108 (\n -> ElemAttr n "" "" "")
109 <$> p_ElemName
110
111 p_ElemAttrValue :: P.Tokens s ~ TL.Text => Parser e s (TL.Text,TL.Text,TL.Text)
112 p_ElemAttrValue =
113 p_ElemAttrValueQuote '\'' <|>
114 p_ElemAttrValueQuote '"' <|>
115 p_ElemAttrValueWord
116
117 p_ElemAttrValueQuote :: Char -> P.Tokens s ~ TL.Text => Parser e s (TL.Text,TL.Text,TL.Text)
118 p_ElemAttrValueQuote q =
119 (\o v c -> (TL.singleton o, v, TL.singleton c))
120 <$> P.char q
121 <*> P.takeWhile1P (Just "ElemAttrValueQuoted") (/=q)
122 <*> P.char q
123 p_ElemAttrValueWord :: P.Tokens s ~ TL.Text => Parser e s (TL.Text,TL.Text,TL.Text)
124 p_ElemAttrValueWord = do
125 w <- P.takeWhile1P (Just "ElemAttrValueWord") $ \c ->
126 Char.isPrint c &&
127 not (Char.isSpace c) &&
128 c /= '\'' &&
129 c /= '"' &&
130 c /= '=' &&
131 c /= '/' &&
132 c /= '<' &&
133 c /= '>'
134 return ("",w,"")