]> Git — Sourcephile - haskell/symantic-xml.git/blob - Language/Symantic/XML/Document.hs
RNC: rename Text -> EscapedText
[haskell/symantic-xml.git] / Language / Symantic / XML / Document.hs
1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
12 , TS.Tree(..)
13 , TS.Trees
14 , TS.tree0
15 ) where
16
17 import Control.Applicative (Alternative(..))
18 import Data.Bool
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
26 import Data.Int (Int)
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..))
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
46
47 -- * Type 'XML'
48 type XML = TS.Tree (Sourced FileSource Node)
49 type XMLs = Seq XML
50
51 pattern Tree0 :: a -> TS.Tree a
52 pattern Tree0 a <- TS.Tree a (null -> True)
53 where Tree0 a = TS.Tree a Seq.empty
54
55 -- ** Type 'Node'
56 data Node
57 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
58 | NodeAttr QName -- ^ Node with a 'NodeText' child.
59 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
60 | NodeText EscapedText -- ^ Leaf.
61 | NodeComment TL.Text -- ^ Leaf.
62 | NodeCDATA TL.Text -- ^ Leaf.
63 deriving (Eq, Ord, Show)
64
65 -- ** Type 'EscapedText'
66 type EscapedText = [Escaped]
67
68 escapeText :: TL.Text -> EscapedText
69 escapeText s =
70 case TL.span (`List.notElem` ("<>&'\""::String)) s of
71 (t, r) | TL.null t -> escape r
72 | otherwise -> EscapedPlain t : escape r
73 where
74 escape t = case TL.uncons t of
75 Nothing -> []
76 Just (c, cs) -> escapeChar c : escapeText cs
77 escapeChar c =
78 case c of
79 '<' -> EscapedEntityRef entityRef_lt
80 '>' -> EscapedEntityRef entityRef_gt
81 '&' -> EscapedEntityRef entityRef_amp
82 '\'' -> EscapedEntityRef entityRef_apos
83 '"' -> EscapedEntityRef entityRef_quot
84 _ -> EscapedPlain $ TL.singleton c
85
86 unescapeText :: EscapedText -> TL.Text
87 unescapeText = foldMap $ \case
88 EscapedPlain t -> t
89 EscapedEntityRef EntityRef{..} -> entityRef_value
90 EscapedCharRef (CharRef c) -> TL.singleton c
91
92 -- *** Type 'Escaped'
93 -- | 'EscapedText' lexemes.
94 data Escaped
95 = EscapedPlain TL.Text
96 | EscapedEntityRef EntityRef
97 | EscapedCharRef CharRef
98 deriving (Eq, Ord, Show)
99
100 -- *** Type 'EntityRef'
101 data EntityRef = EntityRef
102 { entityRef_name :: NCName
103 , entityRef_value :: TL.Text
104 } deriving (Eq, Ord, Show)
105
106 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
107 entityRef_lt = EntityRef (NCName "lt") "<"
108 entityRef_gt = EntityRef (NCName "gt") ">"
109 entityRef_amp = EntityRef (NCName "amp") "&"
110 entityRef_quot = EntityRef (NCName "quot") "\""
111 entityRef_apos = EntityRef (NCName "apos") "'"
112
113 -- *** Type 'CharRef'
114 newtype CharRef = CharRef Char
115 deriving (Eq, Ord, Show)
116
117 -- ** Type 'Name'
118 newtype Name = Name { unName :: TL.Text }
119 deriving (Eq, Ord, Hashable)
120 instance Show Name where
121 showsPrec _p = showString . TL.unpack . unName
122 instance IsString Name where
123 fromString s
124 | c:cs <- s
125 , XC.isXmlNameStartChar c
126 && all XC.isXmlNameChar cs
127 = Name (TL.pack s)
128 | otherwise = error "Invalid XML Name"
129
130 -- ** Type 'Namespace'
131 newtype Namespace = Namespace { unNamespace :: TL.Text }
132 deriving (Eq, Ord, Show, Hashable)
133 instance IsString Namespace where
134 fromString s =
135 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
136 then Namespace (fromString s)
137 else error $ "Invalid XML Namespace: " <> show s
138
139 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
140 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
141 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
142 xmlns_empty = Namespace ""
143
144 -- * Type 'Namespaces'
145 data Namespaces prefix = Namespaces
146 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
147 , namespaces_default :: Namespace
148 } deriving (Show)
149 instance Default (Namespaces NCName) where
150 def = Namespaces
151 { namespaces_prefixes = HM.fromList
152 [ (xmlns_xml , "xml")
153 , (xmlns_xmlns, "xmlns")
154 ]
155 , namespaces_default = ""
156 }
157 instance Default (Namespaces (Maybe NCName)) where
158 def = Namespaces
159 { namespaces_prefixes = HM.fromList
160 [ (xmlns_xml , Just "xml")
161 , (xmlns_xmlns, Just "xmlns")
162 ]
163 , namespaces_default = ""
164 }
165 instance Semigroup (Namespaces NCName) where
166 x <> y = Namespaces
167 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
168 , namespaces_default = namespaces_default x
169 }
170 instance Semigroup (Namespaces (Maybe NCName)) where
171 x <> y = Namespaces
172 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
173 , namespaces_default = namespaces_default x
174 }
175 instance Monoid (Namespaces NCName) where
176 mempty = def
177 mappend = (<>)
178 instance Monoid (Namespaces (Maybe NCName)) where
179 mempty = def
180 mappend = (<>)
181
182 prefixifyQName :: Namespaces NCName -> QName -> PName
183 prefixifyQName Namespaces{..} QName{..} =
184 PName
185 { pNameSpace =
186 if qNameSpace == namespaces_default
187 then Nothing
188 else HM.lookup qNameSpace namespaces_prefixes
189 , pNameLocal = qNameLocal
190 }
191
192 -- ** Type 'NCName'
193 -- | Non-colonized name.
194 newtype NCName = NCName { unNCName :: TL.Text }
195 deriving (Eq, Ord, Hashable)
196 instance Show NCName where
197 showsPrec _p = showString . TL.unpack . unNCName
198 instance IsString NCName where
199 fromString s
200 | c:cs <- s
201 , XC.isXmlNCNameStartChar c
202 && all XC.isXmlNCNameChar cs
203 = NCName (TL.pack s)
204 | otherwise = error "Invalid XML NCName"
205
206 poolNCNames :: [NCName]
207 poolNCNames =
208 [ NCName $ TL.pack ("ns"<>show i)
209 | i <- [1 :: Int ..]
210 ]
211
212 freshNCName :: HS.HashSet NCName -> NCName
213 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
214
215 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
216 freshifyNCName ns (NCName n) =
217 let ints = [1..] :: [Int] in
218 List.head
219 [ fresh
220 | suffix <- mempty : (show <$> ints)
221 , fresh <- [ NCName $ n <> TL.pack suffix]
222 , not $ fresh `HS.member` ns
223 ]
224
225 -- ** Type 'PName'
226 -- | Prefixed name.
227 data PName = PName
228 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
229 , pNameLocal :: NCName -- ^ eg. "stylesheet"
230 } deriving (Eq, Ord, Generic)
231 instance Show PName where
232 showsPrec p PName{pNameSpace=Nothing, ..} =
233 showsPrec p pNameLocal
234 showsPrec _p PName{pNameSpace=Just p, ..} =
235 showsPrec 10 p .
236 showChar ':' .
237 showsPrec 10 pNameLocal
238 instance IsString PName where
239 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
240 fromString s =
241 case List.break (== ':') s of
242 (_, "") -> PName Nothing $ fromString s
243 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
244 instance Hashable PName
245
246 pName :: NCName -> PName
247 pName = PName Nothing
248 {-# INLINE pName #-}
249
250 -- ** Type 'QName'
251 -- | Qualified name.
252 data QName = QName
253 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
254 , qNameLocal :: NCName -- ^ eg. "stylesheet"
255 } deriving (Eq, Ord, Generic)
256 instance Show QName where
257 showsPrec _p QName{..} =
258 (if TL.null $ unNamespace qNameSpace then id
259 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
260 ) . showsPrec 10 qNameLocal
261 instance IsString QName where
262 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
263 fromString full@('{':rest) =
264 case List.break (== '}') rest of
265 (_, "") -> error ("Invalid XML Clark notation: " <> show full)
266 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
267 fromString local = QName "" $ fromString local
268 instance Hashable QName
269
270 qName :: NCName -> QName
271 qName = QName (Namespace "")
272 {-# INLINE qName #-}
273
274 -- * Type 'Sourced'
275 data Sourced src a
276 = Sourced
277 { source :: src
278 , unSourced :: a
279 } deriving (Eq, Ord, Functor)
280 instance (Show src, Show a) => Show (Sourced src a) where
281 showsPrec p Sourced{..} =
282 showParen (p > 10) $
283 showsPrec 11 unSourced .
284 showString " @" . showsPrec 10 source
285 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
286 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
287 Sourced (FileRange fx bx ey :| lx) $
288 x<>fromPad (FilePos lines columns)<>y
289 where
290 lines = filePos_line by - filePos_line ex
291 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
292
293 -- ** Class 'FromPad'
294 class FromPad a where
295 fromPad :: FilePos -> a
296 instance FromPad T.Text where
297 fromPad FilePos{..} =
298 T.replicate filePos_line "\n" <>
299 T.replicate filePos_column " "
300 instance FromPad TL.Text where
301 fromPad FilePos{..} =
302 TL.replicate (fromIntegral filePos_line) "\n" <>
303 TL.replicate (fromIntegral filePos_column) " "
304
305 -- ** Class 'NoSource'
306 class NoSource src where
307 noSource :: src
308 instance NoSource FileSource where
309 noSource = noSource :| []
310 instance NoSource FileRange where
311 noSource = FileRange "" filePos1 filePos1
312 {-
313 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
314 mempty = sourced0 mempty
315 mappend = (<>)
316 -}
317
318 notSourced :: NoSource src => a -> Sourced src a
319 notSourced = Sourced noSource
320
321 -- * Type 'FileSource'
322 type FileSource = NonEmpty FileRange
323
324 -- ** Type 'FileRange'
325 data FileRange
326 = FileRange
327 { fileRange_file :: FilePath
328 , fileRange_begin :: FilePos
329 , fileRange_end :: FilePos
330 } deriving (Eq, Ord)
331 instance Default FileRange where
332 def = FileRange "" filePos1 filePos1
333 instance Show FileRange where
334 showsPrec _p FileRange{..} =
335 showString fileRange_file .
336 showChar '#' . showsPrec 10 fileRange_begin .
337 showChar '-' . showsPrec 10 fileRange_end
338
339 -- *** Type 'FilePos'
340 -- | Absolute text file position.
341 data FilePos = FilePos
342 { filePos_line :: {-# UNPACK #-} LineNum
343 , filePos_column :: {-# UNPACK #-} ColNum
344 } deriving (Eq, Ord)
345 instance Default FilePos where
346 def = filePos1
347 instance Show FilePos where
348 showsPrec _p FilePos{..} =
349 showsPrec 11 filePos_line .
350 showChar ':' .
351 showsPrec 11 filePos_column
352
353 filePos1 :: FilePos
354 filePos1 = FilePos 1 1
355
356 -- **** Type 'LineNum'
357 type LineNum = Int
358
359 -- **** Type 'ColNum'
360 type ColNum = Int