1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
17 import Control.Applicative (Alternative(..))
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..))
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
48 type XML = TS.Tree (Sourced FileSource Node)
51 pattern Tree0 :: a -> TS.Tree a
52 pattern Tree0 a <- TS.Tree a (null -> True)
53 where Tree0 a = TS.Tree a Seq.empty
57 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
58 | NodeAttr QName -- ^ Node with a 'NodeText' child.
59 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
60 | NodeText EscapedText -- ^ Leaf.
61 | NodeComment TL.Text -- ^ Leaf.
62 | NodeCDATA TL.Text -- ^ Leaf.
63 deriving (Eq, Ord, Show)
65 -- ** Type 'EscapedText'
66 type EscapedText = [Escaped]
68 escapeText :: TL.Text -> EscapedText
70 case TL.span (`List.notElem` ("<>&'\""::String)) s of
71 (t, r) | TL.null t -> escape r
72 | otherwise -> EscapedPlain t : escape r
74 escape t = case TL.uncons t of
76 Just (c, cs) -> escapeChar c : escapeText cs
79 '<' -> EscapedEntityRef entityRef_lt
80 '>' -> EscapedEntityRef entityRef_gt
81 '&' -> EscapedEntityRef entityRef_amp
82 '\'' -> EscapedEntityRef entityRef_apos
83 '"' -> EscapedEntityRef entityRef_quot
84 _ -> EscapedPlain $ TL.singleton c
86 unescapeText :: EscapedText -> TL.Text
87 unescapeText = foldMap $ \case
89 EscapedEntityRef EntityRef{..} -> entityRef_value
90 EscapedCharRef (CharRef c) -> TL.singleton c
93 -- | 'EscapedText' lexemes.
95 = EscapedPlain TL.Text
96 | EscapedEntityRef EntityRef
97 | EscapedCharRef CharRef
98 deriving (Eq, Ord, Show)
100 -- *** Type 'EntityRef'
101 data EntityRef = EntityRef
102 { entityRef_name :: NCName
103 , entityRef_value :: TL.Text
104 } deriving (Eq, Ord, Show)
106 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
107 entityRef_lt = EntityRef (NCName "lt") "<"
108 entityRef_gt = EntityRef (NCName "gt") ">"
109 entityRef_amp = EntityRef (NCName "amp") "&"
110 entityRef_quot = EntityRef (NCName "quot") "\""
111 entityRef_apos = EntityRef (NCName "apos") "'"
113 -- *** Type 'CharRef'
114 newtype CharRef = CharRef Char
115 deriving (Eq, Ord, Show)
118 newtype Name = Name { unName :: TL.Text }
119 deriving (Eq, Ord, Hashable)
120 instance Show Name where
121 showsPrec _p = showString . TL.unpack . unName
122 instance IsString Name where
125 , XC.isXmlNameStartChar c
126 && all XC.isXmlNameChar cs
128 | otherwise = error "Invalid XML Name"
130 -- ** Type 'Namespace'
131 newtype Namespace = Namespace { unNamespace :: TL.Text }
132 deriving (Eq, Ord, Show, Hashable)
133 instance IsString Namespace where
135 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
136 then Namespace (fromString s)
137 else error $ "Invalid XML Namespace: " <> show s
139 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
140 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
141 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
142 xmlns_empty = Namespace ""
144 -- * Type 'Namespaces'
145 data Namespaces prefix = Namespaces
146 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
147 , namespaces_default :: Namespace
149 instance Default (Namespaces NCName) where
151 { namespaces_prefixes = HM.fromList
152 [ (xmlns_xml , "xml")
153 , (xmlns_xmlns, "xmlns")
155 , namespaces_default = ""
157 instance Default (Namespaces (Maybe NCName)) where
159 { namespaces_prefixes = HM.fromList
160 [ (xmlns_xml , Just "xml")
161 , (xmlns_xmlns, Just "xmlns")
163 , namespaces_default = ""
165 instance Semigroup (Namespaces NCName) where
167 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
168 , namespaces_default = namespaces_default x
170 instance Semigroup (Namespaces (Maybe NCName)) where
172 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
173 , namespaces_default = namespaces_default x
175 instance Monoid (Namespaces NCName) where
178 instance Monoid (Namespaces (Maybe NCName)) where
182 prefixifyQName :: Namespaces NCName -> QName -> PName
183 prefixifyQName Namespaces{..} QName{..} =
186 if qNameSpace == namespaces_default
188 else HM.lookup qNameSpace namespaces_prefixes
189 , pNameLocal = qNameLocal
193 -- | Non-colonized name.
194 newtype NCName = NCName { unNCName :: TL.Text }
195 deriving (Eq, Ord, Hashable)
196 instance Show NCName where
197 showsPrec _p = showString . TL.unpack . unNCName
198 instance IsString NCName where
201 , XC.isXmlNCNameStartChar c
202 && all XC.isXmlNCNameChar cs
204 | otherwise = error "Invalid XML NCName"
206 poolNCNames :: [NCName]
208 [ NCName $ TL.pack ("ns"<>show i)
212 freshNCName :: HS.HashSet NCName -> NCName
213 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
215 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
216 freshifyNCName ns (NCName n) =
217 let ints = [1..] :: [Int] in
220 | suffix <- mempty : (show <$> ints)
221 , fresh <- [ NCName $ n <> TL.pack suffix]
222 , not $ fresh `HS.member` ns
228 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
229 , pNameLocal :: NCName -- ^ eg. "stylesheet"
230 } deriving (Eq, Ord, Generic)
231 instance Show PName where
232 showsPrec p PName{pNameSpace=Nothing, ..} =
233 showsPrec p pNameLocal
234 showsPrec _p PName{pNameSpace=Just p, ..} =
237 showsPrec 10 pNameLocal
238 instance IsString PName where
239 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
241 case List.break (== ':') s of
242 (_, "") -> PName Nothing $ fromString s
243 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
244 instance Hashable PName
246 pName :: NCName -> PName
247 pName = PName Nothing
253 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
254 , qNameLocal :: NCName -- ^ eg. "stylesheet"
255 } deriving (Eq, Ord, Generic)
256 instance Show QName where
257 showsPrec _p QName{..} =
258 (if TL.null $ unNamespace qNameSpace then id
259 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
260 ) . showsPrec 10 qNameLocal
261 instance IsString QName where
262 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
263 fromString full@('{':rest) =
264 case List.break (== '}') rest of
265 (_, "") -> error ("Invalid XML Clark notation: " <> show full)
266 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
267 fromString local = QName "" $ fromString local
268 instance Hashable QName
270 qName :: NCName -> QName
271 qName = QName (Namespace "")
279 } deriving (Eq, Ord, Functor)
280 instance (Show src, Show a) => Show (Sourced src a) where
281 showsPrec p Sourced{..} =
283 showsPrec 11 unSourced .
284 showString " @" . showsPrec 10 source
285 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
286 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
287 Sourced (FileRange fx bx ey :| lx) $
288 x<>fromPad (FilePos lines columns)<>y
290 lines = filePos_line by - filePos_line ex
291 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
293 -- ** Class 'FromPad'
294 class FromPad a where
295 fromPad :: FilePos -> a
296 instance FromPad T.Text where
297 fromPad FilePos{..} =
298 T.replicate filePos_line "\n" <>
299 T.replicate filePos_column " "
300 instance FromPad TL.Text where
301 fromPad FilePos{..} =
302 TL.replicate (fromIntegral filePos_line) "\n" <>
303 TL.replicate (fromIntegral filePos_column) " "
305 -- ** Class 'NoSource'
306 class NoSource src where
308 instance NoSource FileSource where
309 noSource = noSource :| []
310 instance NoSource FileRange where
311 noSource = FileRange "" filePos1 filePos1
313 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
314 mempty = sourced0 mempty
318 notSourced :: NoSource src => a -> Sourced src a
319 notSourced = Sourced noSource
321 -- * Type 'FileSource'
322 type FileSource = NonEmpty FileRange
324 -- ** Type 'FileRange'
327 { fileRange_file :: FilePath
328 , fileRange_begin :: FilePos
329 , fileRange_end :: FilePos
331 instance Default FileRange where
332 def = FileRange "" filePos1 filePos1
333 instance Show FileRange where
334 showsPrec _p FileRange{..} =
335 showString fileRange_file .
336 showChar '#' . showsPrec 10 fileRange_begin .
337 showChar '-' . showsPrec 10 fileRange_end
339 -- *** Type 'FilePos'
340 -- | Absolute text file position.
341 data FilePos = FilePos
342 { filePos_line :: {-# UNPACK #-} LineNum
343 , filePos_column :: {-# UNPACK #-} ColNum
345 instance Default FilePos where
347 instance Show FilePos where
348 showsPrec _p FilePos{..} =
349 showsPrec 11 filePos_line .
351 showsPrec 11 filePos_column
354 filePos1 = FilePos 1 1
356 -- **** Type 'LineNum'
359 -- **** Type 'ColNum'