]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Read/Elem.hs
DTC: add <page-ref> draft
[doclang.git] / Hdoc / TCT / Read / Elem.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TypeFamilies #-}
5 module Hdoc.TCT.Read.Elem where
6
7 import Control.Applicative (Applicative(..), Alternative(..))
8 import Control.Monad (Monad(..))
9 import Data.Bool
10 import Data.Char (Char)
11 import Data.Eq (Eq(..))
12 import Data.Function (($), (.))
13 import Data.Functor ((<$>), (<$))
14 import Data.Maybe (Maybe(..))
15 import Data.Semigroup (Semigroup(..))
16 import qualified Data.Char as Char
17 import qualified Data.Char.Properties.XMLCharProps as XC
18 import qualified Data.Text.Lazy as TL
19 import qualified Language.Symantic.XML as XML
20 import qualified Text.Megaparsec as P
21 import qualified Text.Megaparsec.Char as P
22
23 import Hdoc.TCT.Debug
24 import Hdoc.TCT.Elem
25 import Hdoc.TCT.Tree
26 import Hdoc.TCT.Read.Cell
27
28 -- * Word
29 p_Spaces :: P.Tokens s ~ TL.Text => Parser e s TL.Text
30 p_Spaces = P.takeWhileP (Just "Space") Char.isSpace
31 p_Spaces1 :: P.Tokens s ~ TL.Text => Parser e s TL.Text
32 p_Spaces1 = P.takeWhile1P (Just "Space") Char.isSpace
33 p_HSpaces :: P.Tokens s ~ TL.Text => Parser e s TL.Text
34 p_HSpaces = P.takeWhileP (Just "HSpace") (==' ')
35 p_Digits :: P.Tokens s ~ TL.Text => Parser e s TL.Text
36 p_Digits = P.takeWhileP (Just "Digit") Char.isDigit
37 p_Digits1 :: P.Tokens s ~ TL.Text => Parser e s TL.Text
38 p_Digits1 = P.takeWhile1P (Just "Digit1") Char.isDigit
39 p_AlphaNums :: P.Tokens s ~ TL.Text => Parser e s TL.Text
40 p_AlphaNums = P.takeWhileP (Just "AlphaNum") Char.isAlphaNum
41 p_AlphaNums1 :: P.Tokens s ~ TL.Text => Parser e s TL.Text
42 p_AlphaNums1 = P.takeWhile1P (Just "AlphaNum1") Char.isAlphaNum
43 {-
44 -- NOTE: could be done with TL.Text, which has a less greedy (<>).
45 p_Word :: Parser e Text Text
46 p_Word = debugParser "Word" $ P.try p_take <|> p_copy
47 where
48 p_take = do
49 P.notFollowedBy $ P.satisfy $ not . Char.isAlphaNum
50 w <- P.takeWhile1P (Just "Word") $ \c ->
51 Char.isAlphaNum c ||
52 c == '_' ||
53 c == '-'
54 guard $ Char.isAlphaNum $ Text.last w
55 return w
56 p_copy =
57 (<>)
58 <$> p_AlphaNums1
59 <*> P.option "" (P.try $
60 (<>)
61 <$> ((Text.pack <$>) $ P.some $ P.char '_' <|> P.char '-')
62 <*> p_copy)
63 -}
64
65 -- * Elem
66 p_ElemSingle :: P.Tokens s ~ TL.Text => Parser e s Pair
67 p_ElemSingle = debugParser "ElemSingle" $
68 PairElem
69 <$ P.char '<'
70 <*> p_ElemName
71 <*> p_ElemAttrs
72 <* P.string "/>"
73
74 p_ElemOpen :: P.Tokens s ~ TL.Text => Parser e s Pair
75 p_ElemOpen = debugParser "ElemOpen" $
76 PairElem
77 <$ P.char '<'
78 <*> p_ElemName
79 <*> p_ElemAttrs
80 <* P.char '>'
81
82 p_ElemName :: P.Tokens s ~ TL.Text => Parser e s ElemName
83 p_ElemName = P.label "NCName" $
84 XML.NCName
85 <$ P.notFollowedBy (P.satisfy $ not . XC.isXmlNCNameStartChar)
86 <*> P.takeWhile1P Nothing XC.isXmlNCNameChar
87 -- TODO: namespace
88
89 p_ElemClose :: P.Tokens s ~ TL.Text => Parser e s Pair
90 p_ElemClose = debugParser "ElemClose" $
91 (`PairElem` [])
92 <$ P.string "</"
93 <*> p_ElemName
94 <* P.char '>'
95
96 {-
97 p_ElemOpenOrSingle :: Parser e Text Pair
98 p_ElemOpenOrSingle =
99 p_ElemOpen >>= \p ->
100 P.char '>' $> LexemePairOpen p <|>
101 P.string "/>" $> LexemePairAny p
102 -}
103
104 -- * 'ElemAttr'
105 p_ElemAttrs :: P.Tokens s ~ TL.Text => Parser e s [(White,ElemAttr)]
106 p_ElemAttrs = P.many $ P.try $ (,) <$> p_Spaces <*> p_ElemAttr
107 p_ElemAttr :: P.Tokens s ~ TL.Text => Parser e s ElemAttr
108 p_ElemAttr = P.try p_ElemAttrEq <|> p_ElemAttrName
109
110 p_ElemAttrEq :: P.Tokens s ~ TL.Text => Parser e s ElemAttr
111 p_ElemAttrEq =
112 (\n (o,v,c) -> ElemAttr n ("="<>o) v c)
113 <$> p_ElemName
114 <* P.char '='
115 <*> p_ElemAttrValue
116 p_ElemAttrName :: P.Tokens s ~ TL.Text => Parser e s ElemAttr
117 p_ElemAttrName =
118 (\n -> ElemAttr n "" "" "")
119 <$> p_ElemName
120
121 p_ElemAttrValue :: P.Tokens s ~ TL.Text => Parser e s (TL.Text,TL.Text,TL.Text)
122 p_ElemAttrValue =
123 p_ElemAttrValueQuote '\'' <|>
124 p_ElemAttrValueQuote '"' <|>
125 p_ElemAttrValueWord
126
127 p_ElemAttrValueQuote :: Char -> P.Tokens s ~ TL.Text => Parser e s (TL.Text,TL.Text,TL.Text)
128 p_ElemAttrValueQuote q =
129 (\o v c -> (TL.singleton o, v, TL.singleton c))
130 <$> P.char q
131 <*> P.takeWhile1P (Just "ElemAttrValueQuoted") (/=q)
132 <*> P.char q
133 p_ElemAttrValueWord :: P.Tokens s ~ TL.Text => Parser e s (TL.Text,TL.Text,TL.Text)
134 p_ElemAttrValueWord = do
135 w <- P.takeWhile1P (Just "ElemAttrValueWord") $ \c ->
136 Char.isPrint c &&
137 not (Char.isSpace c) &&
138 c /= '\'' &&
139 c /= '"' &&
140 c /= '=' &&
141 c /= '/' &&
142 c /= '<' &&
143 c /= '>'
144 return ("",w,"")