1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
17 import Control.Applicative (Alternative(..))
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
48 type XML = TS.Tree (Sourced FileSource Node)
51 pattern Tree0 :: a -> TS.Tree a
52 pattern Tree0 a <- TS.Tree a (null -> True)
53 where Tree0 a = TS.Tree a Seq.empty
57 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
58 | NodeAttr QName -- ^ Node with a 'NodeText' child.
59 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
60 | NodeText EscapedText -- ^ Leaf.
61 | NodeComment TL.Text -- ^ Leaf.
62 | NodeCDATA TL.Text -- ^ Leaf.
63 deriving (Eq, Ord, Show)
65 -- ** Type 'EscapedText'
66 type EscapedText = [Escaped]
68 escapeText :: TL.Text -> EscapedText
70 case TL.span (`List.notElem` ("<>&'\""::String)) s of
71 (t, r) | TL.null t -> escape r
72 | otherwise -> EscapedPlain t : escape r
74 escape t = case TL.uncons t of
76 Just (c, cs) -> escapeChar c : escapeText cs
79 '<' -> EscapedEntityRef entityRef_lt
80 '>' -> EscapedEntityRef entityRef_gt
81 '&' -> EscapedEntityRef entityRef_amp
82 '\'' -> EscapedEntityRef entityRef_apos
83 '"' -> EscapedEntityRef entityRef_quot
84 _ -> EscapedPlain $ TL.singleton c
86 unescapeText :: EscapedText -> TL.Text
87 unescapeText = foldMap $ \case
89 EscapedEntityRef EntityRef{..} -> entityRef_value
90 EscapedCharRef (CharRef c) -> TL.singleton c
93 -- | 'EscapedText' lexemes.
95 = EscapedPlain TL.Text
96 | EscapedEntityRef EntityRef
97 | EscapedCharRef CharRef
98 deriving (Eq, Ord, Show)
100 -- *** Type 'EntityRef'
101 data EntityRef = EntityRef
102 { entityRef_name :: NCName
103 , entityRef_value :: TL.Text
104 } deriving (Eq, Ord, Show)
106 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
107 entityRef_lt = EntityRef (NCName "lt") "<"
108 entityRef_gt = EntityRef (NCName "gt") ">"
109 entityRef_amp = EntityRef (NCName "amp") "&"
110 entityRef_quot = EntityRef (NCName "quot") "\""
111 entityRef_apos = EntityRef (NCName "apos") "'"
113 -- *** Type 'CharRef'
114 newtype CharRef = CharRef Char
115 deriving (Eq, Ord, Show)
118 newtype Name = Name { unName :: TL.Text }
119 deriving (Eq, Ord, Hashable)
120 instance Show Name where
121 showsPrec _p = showString . TL.unpack . unName
122 instance IsString Name where
125 , XC.isXmlNameStartChar c
126 && all XC.isXmlNameChar cs
128 | otherwise = error "Invalid XML Name"
130 -- ** Type 'Namespace'
131 newtype Namespace = Namespace { unNamespace :: TL.Text }
132 deriving (Eq, Ord, Show, Hashable)
133 instance IsString Namespace where
135 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
136 then Namespace (fromString s)
137 else error $ "Invalid XML Namespace: " <> show s
139 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
140 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
141 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
142 xmlns_empty = Namespace ""
144 -- * Type 'Namespaces'
145 data Namespaces prefix = Namespaces
146 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
147 , namespaces_default :: Namespace
149 instance Default (Namespaces NCName) where
151 { namespaces_prefixes = HM.fromList
152 [ (xmlns_xml , "xml")
153 , (xmlns_xmlns, "xmlns")
155 , namespaces_default = ""
157 instance Default (Namespaces (Maybe NCName)) where
159 { namespaces_prefixes = HM.fromList
160 [ (xmlns_xml , Just "xml")
161 , (xmlns_xmlns, Just "xmlns")
163 , namespaces_default = ""
165 instance Semigroup (Namespaces NCName) where
167 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
168 , namespaces_default = namespaces_default x
170 instance Semigroup (Namespaces (Maybe NCName)) where
172 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
173 , namespaces_default = namespaces_default x
175 instance Monoid (Namespaces NCName) where
178 instance Monoid (Namespaces (Maybe NCName)) where
182 prefixifyQName :: Namespaces NCName -> QName -> PName
183 prefixifyQName Namespaces{..} QName{..} =
186 if qNameSpace == namespaces_default
188 else HM.lookup qNameSpace namespaces_prefixes
189 , pNameLocal = qNameLocal
193 -- | Non-colonized name.
194 newtype NCName = NCName { unNCName :: TL.Text }
195 deriving (Eq, Ord, Hashable)
196 instance Show NCName where
197 showsPrec _p = showString . TL.unpack . unNCName
198 instance IsString NCName where
200 fromMaybe (error "Invalid XML NCName") $
203 ncName :: TL.Text -> Maybe NCName
207 | XC.isXmlNCNameStartChar c
208 , TL.all XC.isXmlNCNameChar cs
212 poolNCNames :: [NCName]
214 [ NCName $ TL.pack ("ns"<>show i)
218 freshNCName :: HS.HashSet NCName -> NCName
219 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
221 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
222 freshifyNCName ns (NCName n) =
223 let ints = [1..] :: [Int] in
226 | suffix <- mempty : (show <$> ints)
227 , fresh <- [ NCName $ n <> TL.pack suffix]
228 , not $ fresh `HS.member` ns
234 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
235 , pNameLocal :: NCName -- ^ eg. "stylesheet"
236 } deriving (Eq, Ord, Generic)
237 instance Show PName where
238 showsPrec p PName{pNameSpace=Nothing, ..} =
239 showsPrec p pNameLocal
240 showsPrec _p PName{pNameSpace=Just p, ..} =
243 showsPrec 10 pNameLocal
244 instance IsString PName where
245 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
247 case List.break (== ':') s of
248 (_, "") -> PName Nothing $ fromString s
249 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
250 instance Hashable PName
252 pName :: NCName -> PName
253 pName = PName Nothing
259 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
260 , qNameLocal :: NCName -- ^ eg. "stylesheet"
261 } deriving (Eq, Ord, Generic)
262 instance Show QName where
263 showsPrec _p QName{..} =
264 (if TL.null $ unNamespace qNameSpace then id
265 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
266 ) . showsPrec 10 qNameLocal
267 instance IsString QName where
268 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
269 fromString full@('{':rest) =
270 case List.break (== '}') rest of
271 (_, "") -> error ("Invalid XML Clark notation: " <> show full)
272 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
273 fromString local = QName "" $ fromString local
274 instance Hashable QName
276 qName :: NCName -> QName
277 qName = QName (Namespace "")
285 } deriving (Eq, Ord, Functor)
286 instance (Show src, Show a) => Show (Sourced src a) where
287 showsPrec p Sourced{..} =
289 showsPrec 11 unSourced .
290 showString " @" . showsPrec 10 source
291 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
292 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
293 Sourced (FileRange fx bx ey :| lx) $
294 x<>fromPad (FilePos lines columns)<>y
296 lines = filePos_line by - filePos_line ex
297 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
299 -- ** Class 'FromPad'
300 class FromPad a where
301 fromPad :: FilePos -> a
302 instance FromPad T.Text where
303 fromPad FilePos{..} =
304 T.replicate filePos_line "\n" <>
305 T.replicate filePos_column " "
306 instance FromPad TL.Text where
307 fromPad FilePos{..} =
308 TL.replicate (fromIntegral filePos_line) "\n" <>
309 TL.replicate (fromIntegral filePos_column) " "
311 -- ** Class 'NoSource'
312 class NoSource src where
314 instance NoSource FileSource where
315 noSource = noSource :| []
316 instance NoSource FileRange where
317 noSource = FileRange "" filePos1 filePos1
319 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
320 mempty = sourced0 mempty
324 notSourced :: NoSource src => a -> Sourced src a
325 notSourced = Sourced noSource
327 -- * Type 'FileSource'
328 type FileSource = NonEmpty FileRange
330 -- ** Type 'FileRange'
333 { fileRange_file :: FilePath
334 , fileRange_begin :: FilePos
335 , fileRange_end :: FilePos
337 instance Default FileRange where
338 def = FileRange "" filePos1 filePos1
339 instance Show FileRange where
340 showsPrec _p FileRange{..} =
341 showString fileRange_file .
342 showChar '#' . showsPrec 10 fileRange_begin .
343 showChar '-' . showsPrec 10 fileRange_end
345 -- *** Type 'FilePos'
346 -- | Absolute text file position.
347 data FilePos = FilePos
348 { filePos_line :: {-# UNPACK #-} LineNum
349 , filePos_column :: {-# UNPACK #-} ColNum
351 instance Default FilePos where
353 instance Show FilePos where
354 showsPrec _p FilePos{..} =
355 showsPrec 11 filePos_line .
357 showsPrec 11 filePos_column
360 filePos1 = FilePos 1 1
362 -- **** Type 'LineNum'
365 -- **** Type 'ColNum'