]> Git — Sourcephile - haskell/symantic-xml.git/blob - Language/Symantic/XML/Document.hs
XML: add ncName
[haskell/symantic-xml.git] / Language / Symantic / XML / Document.hs
1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
12 , TS.Tree(..)
13 , TS.Trees
14 , TS.tree0
15 ) where
16
17 import Control.Applicative (Alternative(..))
18 import Data.Bool
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
26 import Data.Int (Int)
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
46
47 -- * Type 'XML'
48 type XML = TS.Tree (Sourced FileSource Node)
49 type XMLs = Seq XML
50
51 pattern Tree0 :: a -> TS.Tree a
52 pattern Tree0 a <- TS.Tree a (null -> True)
53 where Tree0 a = TS.Tree a Seq.empty
54
55 -- ** Type 'Node'
56 data Node
57 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
58 | NodeAttr QName -- ^ Node with a 'NodeText' child.
59 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
60 | NodeText EscapedText -- ^ Leaf.
61 | NodeComment TL.Text -- ^ Leaf.
62 | NodeCDATA TL.Text -- ^ Leaf.
63 deriving (Eq, Ord, Show)
64
65 -- ** Type 'EscapedText'
66 type EscapedText = [Escaped]
67
68 escapeText :: TL.Text -> EscapedText
69 escapeText s =
70 case TL.span (`List.notElem` ("<>&'\""::String)) s of
71 (t, r) | TL.null t -> escape r
72 | otherwise -> EscapedPlain t : escape r
73 where
74 escape t = case TL.uncons t of
75 Nothing -> []
76 Just (c, cs) -> escapeChar c : escapeText cs
77 escapeChar c =
78 case c of
79 '<' -> EscapedEntityRef entityRef_lt
80 '>' -> EscapedEntityRef entityRef_gt
81 '&' -> EscapedEntityRef entityRef_amp
82 '\'' -> EscapedEntityRef entityRef_apos
83 '"' -> EscapedEntityRef entityRef_quot
84 _ -> EscapedPlain $ TL.singleton c
85
86 unescapeText :: EscapedText -> TL.Text
87 unescapeText = foldMap $ \case
88 EscapedPlain t -> t
89 EscapedEntityRef EntityRef{..} -> entityRef_value
90 EscapedCharRef (CharRef c) -> TL.singleton c
91
92 -- *** Type 'Escaped'
93 -- | 'EscapedText' lexemes.
94 data Escaped
95 = EscapedPlain TL.Text
96 | EscapedEntityRef EntityRef
97 | EscapedCharRef CharRef
98 deriving (Eq, Ord, Show)
99
100 -- *** Type 'EntityRef'
101 data EntityRef = EntityRef
102 { entityRef_name :: NCName
103 , entityRef_value :: TL.Text
104 } deriving (Eq, Ord, Show)
105
106 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
107 entityRef_lt = EntityRef (NCName "lt") "<"
108 entityRef_gt = EntityRef (NCName "gt") ">"
109 entityRef_amp = EntityRef (NCName "amp") "&"
110 entityRef_quot = EntityRef (NCName "quot") "\""
111 entityRef_apos = EntityRef (NCName "apos") "'"
112
113 -- *** Type 'CharRef'
114 newtype CharRef = CharRef Char
115 deriving (Eq, Ord, Show)
116
117 -- ** Type 'Name'
118 newtype Name = Name { unName :: TL.Text }
119 deriving (Eq, Ord, Hashable)
120 instance Show Name where
121 showsPrec _p = showString . TL.unpack . unName
122 instance IsString Name where
123 fromString s
124 | c:cs <- s
125 , XC.isXmlNameStartChar c
126 && all XC.isXmlNameChar cs
127 = Name (TL.pack s)
128 | otherwise = error "Invalid XML Name"
129
130 -- ** Type 'Namespace'
131 newtype Namespace = Namespace { unNamespace :: TL.Text }
132 deriving (Eq, Ord, Show, Hashable)
133 instance IsString Namespace where
134 fromString s =
135 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
136 then Namespace (fromString s)
137 else error $ "Invalid XML Namespace: " <> show s
138
139 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
140 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
141 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
142 xmlns_empty = Namespace ""
143
144 -- * Type 'Namespaces'
145 data Namespaces prefix = Namespaces
146 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
147 , namespaces_default :: Namespace
148 } deriving (Show)
149 instance Default (Namespaces NCName) where
150 def = Namespaces
151 { namespaces_prefixes = HM.fromList
152 [ (xmlns_xml , "xml")
153 , (xmlns_xmlns, "xmlns")
154 ]
155 , namespaces_default = ""
156 }
157 instance Default (Namespaces (Maybe NCName)) where
158 def = Namespaces
159 { namespaces_prefixes = HM.fromList
160 [ (xmlns_xml , Just "xml")
161 , (xmlns_xmlns, Just "xmlns")
162 ]
163 , namespaces_default = ""
164 }
165 instance Semigroup (Namespaces NCName) where
166 x <> y = Namespaces
167 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
168 , namespaces_default = namespaces_default x
169 }
170 instance Semigroup (Namespaces (Maybe NCName)) where
171 x <> y = Namespaces
172 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
173 , namespaces_default = namespaces_default x
174 }
175 instance Monoid (Namespaces NCName) where
176 mempty = def
177 mappend = (<>)
178 instance Monoid (Namespaces (Maybe NCName)) where
179 mempty = def
180 mappend = (<>)
181
182 prefixifyQName :: Namespaces NCName -> QName -> PName
183 prefixifyQName Namespaces{..} QName{..} =
184 PName
185 { pNameSpace =
186 if qNameSpace == namespaces_default
187 then Nothing
188 else HM.lookup qNameSpace namespaces_prefixes
189 , pNameLocal = qNameLocal
190 }
191
192 -- ** Type 'NCName'
193 -- | Non-colonized name.
194 newtype NCName = NCName { unNCName :: TL.Text }
195 deriving (Eq, Ord, Hashable)
196 instance Show NCName where
197 showsPrec _p = showString . TL.unpack . unNCName
198 instance IsString NCName where
199 fromString s =
200 fromMaybe (error "Invalid XML NCName") $
201 ncName (TL.pack s)
202
203 ncName :: TL.Text -> Maybe NCName
204 ncName t =
205 case TL.uncons t of
206 Just (c, cs)
207 | XC.isXmlNCNameStartChar c
208 , TL.all XC.isXmlNCNameChar cs
209 -> Just (NCName t)
210 _ -> Nothing
211
212 poolNCNames :: [NCName]
213 poolNCNames =
214 [ NCName $ TL.pack ("ns"<>show i)
215 | i <- [1 :: Int ..]
216 ]
217
218 freshNCName :: HS.HashSet NCName -> NCName
219 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
220
221 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
222 freshifyNCName ns (NCName n) =
223 let ints = [1..] :: [Int] in
224 List.head
225 [ fresh
226 | suffix <- mempty : (show <$> ints)
227 , fresh <- [ NCName $ n <> TL.pack suffix]
228 , not $ fresh `HS.member` ns
229 ]
230
231 -- ** Type 'PName'
232 -- | Prefixed name.
233 data PName = PName
234 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
235 , pNameLocal :: NCName -- ^ eg. "stylesheet"
236 } deriving (Eq, Ord, Generic)
237 instance Show PName where
238 showsPrec p PName{pNameSpace=Nothing, ..} =
239 showsPrec p pNameLocal
240 showsPrec _p PName{pNameSpace=Just p, ..} =
241 showsPrec 10 p .
242 showChar ':' .
243 showsPrec 10 pNameLocal
244 instance IsString PName where
245 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
246 fromString s =
247 case List.break (== ':') s of
248 (_, "") -> PName Nothing $ fromString s
249 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
250 instance Hashable PName
251
252 pName :: NCName -> PName
253 pName = PName Nothing
254 {-# INLINE pName #-}
255
256 -- ** Type 'QName'
257 -- | Qualified name.
258 data QName = QName
259 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
260 , qNameLocal :: NCName -- ^ eg. "stylesheet"
261 } deriving (Eq, Ord, Generic)
262 instance Show QName where
263 showsPrec _p QName{..} =
264 (if TL.null $ unNamespace qNameSpace then id
265 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
266 ) . showsPrec 10 qNameLocal
267 instance IsString QName where
268 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
269 fromString full@('{':rest) =
270 case List.break (== '}') rest of
271 (_, "") -> error ("Invalid XML Clark notation: " <> show full)
272 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
273 fromString local = QName "" $ fromString local
274 instance Hashable QName
275
276 qName :: NCName -> QName
277 qName = QName (Namespace "")
278 {-# INLINE qName #-}
279
280 -- * Type 'Sourced'
281 data Sourced src a
282 = Sourced
283 { source :: src
284 , unSourced :: a
285 } deriving (Eq, Ord, Functor)
286 instance (Show src, Show a) => Show (Sourced src a) where
287 showsPrec p Sourced{..} =
288 showParen (p > 10) $
289 showsPrec 11 unSourced .
290 showString " @" . showsPrec 10 source
291 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
292 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
293 Sourced (FileRange fx bx ey :| lx) $
294 x<>fromPad (FilePos lines columns)<>y
295 where
296 lines = filePos_line by - filePos_line ex
297 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
298
299 -- ** Class 'FromPad'
300 class FromPad a where
301 fromPad :: FilePos -> a
302 instance FromPad T.Text where
303 fromPad FilePos{..} =
304 T.replicate filePos_line "\n" <>
305 T.replicate filePos_column " "
306 instance FromPad TL.Text where
307 fromPad FilePos{..} =
308 TL.replicate (fromIntegral filePos_line) "\n" <>
309 TL.replicate (fromIntegral filePos_column) " "
310
311 -- ** Class 'NoSource'
312 class NoSource src where
313 noSource :: src
314 instance NoSource FileSource where
315 noSource = noSource :| []
316 instance NoSource FileRange where
317 noSource = FileRange "" filePos1 filePos1
318 {-
319 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
320 mempty = sourced0 mempty
321 mappend = (<>)
322 -}
323
324 notSourced :: NoSource src => a -> Sourced src a
325 notSourced = Sourced noSource
326
327 -- * Type 'FileSource'
328 type FileSource = NonEmpty FileRange
329
330 -- ** Type 'FileRange'
331 data FileRange
332 = FileRange
333 { fileRange_file :: FilePath
334 , fileRange_begin :: FilePos
335 , fileRange_end :: FilePos
336 } deriving (Eq, Ord)
337 instance Default FileRange where
338 def = FileRange "" filePos1 filePos1
339 instance Show FileRange where
340 showsPrec _p FileRange{..} =
341 showString fileRange_file .
342 showChar '#' . showsPrec 10 fileRange_begin .
343 showChar '-' . showsPrec 10 fileRange_end
344
345 -- *** Type 'FilePos'
346 -- | Absolute text file position.
347 data FilePos = FilePos
348 { filePos_line :: {-# UNPACK #-} LineNum
349 , filePos_column :: {-# UNPACK #-} ColNum
350 } deriving (Eq, Ord)
351 instance Default FilePos where
352 def = filePos1
353 instance Show FilePos where
354 showsPrec _p FilePos{..} =
355 showsPrec 11 filePos_line .
356 showChar ':' .
357 showsPrec 11 filePos_column
358
359 filePos1 :: FilePos
360 filePos1 = FilePos 1 1
361
362 -- **** Type 'LineNum'
363 type LineNum = Int
364
365 -- **** Type 'ColNum'
366 type ColNum = Int