1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
17 import Control.Applicative (Alternative(..))
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
48 type XML = TS.Tree (Sourced FileSource Node)
51 pattern Tree0 :: a -> TS.Tree a
52 pattern Tree0 a <- TS.Tree a (null -> True)
53 where Tree0 a = TS.Tree a Seq.empty
57 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
58 | NodeAttr QName -- ^ Node with a 'NodeText' child.
59 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
60 | NodeText EscapedText -- ^ Leaf.
61 | NodeComment TL.Text -- ^ Leaf.
62 | NodeCDATA TL.Text -- ^ Leaf.
63 deriving (Eq, Ord, Show)
65 -- ** Type 'EscapedText'
66 type EscapedText = [Escaped]
68 escapeText :: TL.Text -> EscapedText
70 case TL.span (`List.notElem` ("<>&'\""::String)) s of
71 (t, r) | TL.null t -> escape r
72 | otherwise -> EscapedPlain t : escape r
74 escape t = case TL.uncons t of
76 Just (c, cs) -> escapeChar c : escapeText cs
78 escapeChar :: Char -> Escaped
81 '<' -> EscapedEntityRef entityRef_lt
82 '>' -> EscapedEntityRef entityRef_gt
83 '&' -> EscapedEntityRef entityRef_amp
84 '\'' -> EscapedEntityRef entityRef_apos
85 '"' -> EscapedEntityRef entityRef_quot
86 _ -> EscapedPlain $ TL.singleton c
88 unescapeText :: EscapedText -> TL.Text
89 unescapeText = foldMap $ \case
91 EscapedEntityRef EntityRef{..} -> entityRef_value
92 EscapedCharRef (CharRef c) -> TL.singleton c
95 -- | 'EscapedText' lexemes.
97 = EscapedPlain TL.Text
98 | EscapedEntityRef EntityRef
99 | EscapedCharRef CharRef
100 deriving (Eq, Ord, Show)
102 -- *** Type 'EntityRef'
103 data EntityRef = EntityRef
104 { entityRef_name :: NCName
105 , entityRef_value :: TL.Text
106 } deriving (Eq, Ord, Show)
108 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
109 entityRef_lt = EntityRef (NCName "lt") "<"
110 entityRef_gt = EntityRef (NCName "gt") ">"
111 entityRef_amp = EntityRef (NCName "amp") "&"
112 entityRef_quot = EntityRef (NCName "quot") "\""
113 entityRef_apos = EntityRef (NCName "apos") "'"
115 -- *** Type 'CharRef'
116 newtype CharRef = CharRef Char
117 deriving (Eq, Ord, Show)
120 newtype Name = Name { unName :: TL.Text }
121 deriving (Eq, Ord, Hashable)
122 instance Show Name where
123 showsPrec _p = showString . TL.unpack . unName
124 instance IsString Name where
127 , XC.isXmlNameStartChar c
128 && all XC.isXmlNameChar cs
130 | otherwise = error "Invalid XML Name"
132 -- ** Type 'Namespace'
133 newtype Namespace = Namespace { unNamespace :: TL.Text }
134 deriving (Eq, Ord, Show, Hashable)
135 instance IsString Namespace where
137 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
138 then Namespace (fromString s)
139 else error $ "Invalid XML Namespace: " <> show s
141 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
142 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
143 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
144 xmlns_empty = Namespace ""
146 -- * Type 'Namespaces'
147 data Namespaces prefix = Namespaces
148 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
149 , namespaces_default :: Namespace
151 instance Default (Namespaces NCName) where
153 { namespaces_prefixes = HM.fromList
154 [ (xmlns_xml , "xml")
155 , (xmlns_xmlns, "xmlns")
157 , namespaces_default = ""
159 instance Default (Namespaces (Maybe NCName)) where
161 { namespaces_prefixes = HM.fromList
162 [ (xmlns_xml , Just "xml")
163 , (xmlns_xmlns, Just "xmlns")
165 , namespaces_default = ""
167 instance Semigroup (Namespaces NCName) where
169 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
170 , namespaces_default = namespaces_default x
172 instance Semigroup (Namespaces (Maybe NCName)) where
174 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
175 , namespaces_default = namespaces_default x
177 instance Monoid (Namespaces NCName) where
180 instance Monoid (Namespaces (Maybe NCName)) where
184 prefixifyQName :: Namespaces NCName -> QName -> PName
185 prefixifyQName Namespaces{..} QName{..} =
188 if qNameSpace == namespaces_default
190 else HM.lookup qNameSpace namespaces_prefixes
191 , pNameLocal = qNameLocal
195 -- | Non-colonized name.
196 newtype NCName = NCName { unNCName :: TL.Text }
197 deriving (Eq, Ord, Hashable)
198 instance Show NCName where
199 showsPrec _p = showString . TL.unpack . unNCName
200 instance IsString NCName where
202 fromMaybe (error "Invalid XML NCName") $
205 ncName :: TL.Text -> Maybe NCName
209 | XC.isXmlNCNameStartChar c
210 , TL.all XC.isXmlNCNameChar cs
214 poolNCNames :: [NCName]
216 [ NCName $ TL.pack ("ns"<>show i)
220 freshNCName :: HS.HashSet NCName -> NCName
221 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
223 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
224 freshifyNCName ns (NCName n) =
225 let ints = [1..] :: [Int] in
228 | suffix <- mempty : (show <$> ints)
229 , fresh <- [ NCName $ n <> TL.pack suffix]
230 , not $ fresh `HS.member` ns
236 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
237 , pNameLocal :: NCName -- ^ eg. "stylesheet"
238 } deriving (Eq, Ord, Generic)
239 instance Show PName where
240 showsPrec p PName{pNameSpace=Nothing, ..} =
241 showsPrec p pNameLocal
242 showsPrec _p PName{pNameSpace=Just p, ..} =
245 showsPrec 10 pNameLocal
246 instance IsString PName where
247 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
249 case List.break (== ':') s of
250 (_, "") -> PName Nothing $ fromString s
251 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
252 instance Hashable PName
254 pName :: NCName -> PName
255 pName = PName Nothing
261 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
262 , qNameLocal :: NCName -- ^ eg. "stylesheet"
263 } deriving (Eq, Ord, Generic)
264 instance Show QName where
265 showsPrec _p QName{..} =
266 (if TL.null $ unNamespace qNameSpace then id
267 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
268 ) . showsPrec 10 qNameLocal
269 instance IsString QName where
270 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
271 fromString full@('{':rest) =
272 case List.break (== '}') rest of
273 (_, "") -> error ("Invalid XML Clark notation: " <> show full)
274 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
275 fromString local = QName "" $ fromString local
276 instance Hashable QName
278 qName :: NCName -> QName
279 qName = QName (Namespace "")
287 } deriving (Eq, Ord, Functor)
288 instance (Show src, Show a) => Show (Sourced src a) where
289 showsPrec p Sourced{..} =
291 showsPrec 11 unSourced .
292 showString " @" . showsPrec 10 source
293 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
294 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
295 Sourced (FileRange fx bx ey :| lx) $
296 x<>fromPad (FilePos lines columns)<>y
298 lines = filePos_line by - filePos_line ex
299 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
301 -- ** Class 'FromPad'
302 class FromPad a where
303 fromPad :: FilePos -> a
304 instance FromPad T.Text where
305 fromPad FilePos{..} =
306 T.replicate filePos_line "\n" <>
307 T.replicate filePos_column " "
308 instance FromPad TL.Text where
309 fromPad FilePos{..} =
310 TL.replicate (fromIntegral filePos_line) "\n" <>
311 TL.replicate (fromIntegral filePos_column) " "
313 -- ** Class 'NoSource'
314 class NoSource src where
316 instance NoSource FileSource where
317 noSource = noSource :| []
318 instance NoSource FileRange where
319 noSource = FileRange "" filePos1 filePos1
321 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
322 mempty = sourced0 mempty
326 notSourced :: NoSource src => a -> Sourced src a
327 notSourced = Sourced noSource
329 -- * Type 'FileSource'
330 type FileSource = NonEmpty FileRange
332 -- ** Type 'FileRange'
335 { fileRange_file :: FilePath
336 , fileRange_begin :: FilePos
337 , fileRange_end :: FilePos
339 instance Default FileRange where
340 def = FileRange "" filePos1 filePos1
341 instance Show FileRange where
342 showsPrec _p FileRange{..} =
343 showString fileRange_file .
344 showChar '#' . showsPrec 10 fileRange_begin .
345 showChar '-' . showsPrec 10 fileRange_end
347 -- *** Type 'FilePos'
348 -- | Absolute text file position.
349 data FilePos = FilePos
350 { filePos_line :: {-# UNPACK #-} LineNum
351 , filePos_column :: {-# UNPACK #-} ColNum
353 instance Default FilePos where
355 instance Show FilePos where
356 showsPrec _p FilePos{..} =
357 showsPrec 11 filePos_line .
359 showsPrec 11 filePos_column
362 filePos1 = FilePos 1 1
364 -- **** Type 'LineNum'
367 -- **** Type 'ColNum'