1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
17 import Control.Applicative (Alternative(..))
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((+), error)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text.Lazy as TL
44 import qualified Data.TreeSeq.Strict as TS
47 type XML src = TS.Tree (Sourced src Node)
48 type XMLs src = Seq (XML src)
50 -- | Unify two 'XMLs', merging border 'NodeText's if any.
51 union :: Semigroup (Sourced src EscapedText) => XMLs src -> XMLs src -> XMLs src
53 case (Seq.viewr x, Seq.viewl y) of
54 (xs Seq.:> x0, y0 Seq.:< ys) ->
56 ( Tree0 (Sourced sx (NodeText tx))
57 , Tree0 (Sourced sy (NodeText ty)) ) ->
59 Seq.singleton (Tree0 $ (NodeText <$>) $ Sourced sx tx <> Sourced sy ty) `union`
66 Semigroup (Sourced src EscapedText) =>
67 Foldable f => f (XMLs src) -> XMLs src
68 unions = foldl' union mempty
70 pattern Tree0 :: a -> TS.Tree a
71 pattern Tree0 a <- TS.Tree a (null -> True)
72 where Tree0 a = TS.Tree a Seq.empty
76 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
77 | NodeAttr QName -- ^ Node with a 'NodeText' child.
78 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
79 | NodeText EscapedText -- ^ Leaf.
80 | NodeComment TL.Text -- ^ Leaf.
81 | NodeCDATA TL.Text -- ^ Leaf.
82 deriving (Eq, Ord, Show)
84 -- ** Type 'EscapedText'
85 newtype EscapedText = EscapedText (Seq Escaped)
86 deriving (Eq, Ord, Show)
88 escapeText :: TL.Text -> EscapedText
91 case TL.span (`List.notElem` ("<>&'\""::String)) s of
92 (t, r) | TL.null t -> escape r
93 | otherwise -> EscapedPlain t Seq.<| escape r
95 escape t = case TL.uncons t of
97 Just (c, cs) -> escapeChar c Seq.<| et where EscapedText et = escapeText cs
99 escapeChar :: Char -> Escaped
102 '<' -> EscapedEntityRef entityRef_lt
103 '>' -> EscapedEntityRef entityRef_gt
104 '&' -> EscapedEntityRef entityRef_amp
105 '\'' -> EscapedEntityRef entityRef_apos
106 '"' -> EscapedEntityRef entityRef_quot
107 _ -> EscapedPlain $ TL.singleton c
109 unescapeText :: EscapedText -> TL.Text
110 unescapeText (EscapedText et) = (`foldMap` et) $ \case
112 EscapedEntityRef EntityRef{..} -> entityRef_value
113 EscapedCharRef (CharRef c) -> TL.singleton c
115 instance Semigroup EscapedText where
116 EscapedText x <> EscapedText y =
118 (xl Seq.:|> EscapedPlain xr, EscapedPlain yl Seq.:<|yr) ->
119 (EscapedText $ xl Seq.|> EscapedPlain (xr<>yl)) <> EscapedText yr
120 _ -> EscapedText $ x <> y
121 instance Monoid EscapedText where
122 mempty = EscapedText mempty
125 -- *** Type 'Escaped'
126 -- | 'EscapedText' lexemes.
128 = EscapedPlain TL.Text
129 | EscapedEntityRef EntityRef
130 | EscapedCharRef CharRef
131 deriving (Eq, Ord, Show)
133 -- *** Type 'EntityRef'
134 data EntityRef = EntityRef
135 { entityRef_name :: NCName
136 , entityRef_value :: TL.Text
137 } deriving (Eq, Ord, Show)
139 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
140 entityRef_lt = EntityRef (NCName "lt") "<"
141 entityRef_gt = EntityRef (NCName "gt") ">"
142 entityRef_amp = EntityRef (NCName "amp") "&"
143 entityRef_quot = EntityRef (NCName "quot") "\""
144 entityRef_apos = EntityRef (NCName "apos") "'"
146 -- *** Type 'CharRef'
147 newtype CharRef = CharRef Char
148 deriving (Eq, Ord, Show)
151 newtype Name = Name { unName :: TL.Text }
152 deriving (Eq, Ord, Hashable)
153 instance Show Name where
154 showsPrec _p = showString . TL.unpack . unName
155 instance IsString Name where
158 , XC.isXmlNameStartChar c
159 && all XC.isXmlNameChar cs
161 | otherwise = error $ "Invalid XML Name: "<>show s
163 -- ** Type 'Namespace'
164 newtype Namespace = Namespace { unNamespace :: TL.Text }
165 deriving (Eq, Ord, Show, Hashable)
166 instance IsString Namespace where
168 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
169 then Namespace (fromString s)
170 else error $ "Invalid XML Namespace: "<>show s
172 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
173 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
174 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
175 xmlns_empty = Namespace ""
177 -- * Type 'Namespaces'
178 data Namespaces prefix = Namespaces
179 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
180 , namespaces_default :: Namespace
182 instance Default (Namespaces NCName) where
184 { namespaces_prefixes = HM.fromList
185 [ (xmlns_xml , "xml")
186 , (xmlns_xmlns, "xmlns")
188 , namespaces_default = ""
190 instance Default (Namespaces (Maybe NCName)) where
192 { namespaces_prefixes = HM.fromList
193 [ (xmlns_xml , Just "xml")
194 , (xmlns_xmlns, Just "xmlns")
196 , namespaces_default = ""
198 instance Semigroup (Namespaces NCName) where
200 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
201 , namespaces_default = namespaces_default x
203 instance Semigroup (Namespaces (Maybe NCName)) where
205 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
206 , namespaces_default = namespaces_default x
208 instance Monoid (Namespaces NCName) where
211 instance Monoid (Namespaces (Maybe NCName)) where
215 prefixifyQName :: Namespaces NCName -> QName -> PName
216 prefixifyQName Namespaces{..} QName{..} =
219 if qNameSpace == namespaces_default
221 else HM.lookup qNameSpace namespaces_prefixes
222 , pNameLocal = qNameLocal
226 -- | Non-colonized name.
227 newtype NCName = NCName { unNCName :: TL.Text }
228 deriving (Eq, Ord, Hashable)
229 instance Show NCName where
230 showsPrec _p = showString . TL.unpack . unNCName
231 instance IsString NCName where
233 fromMaybe (error $ "Invalid XML NCName: "<>show s) $
236 ncName :: TL.Text -> Maybe NCName
240 | XC.isXmlNCNameStartChar c
241 , TL.all XC.isXmlNCNameChar cs
245 poolNCNames :: [NCName]
247 [ NCName $ TL.pack ("ns"<>show i)
251 freshNCName :: HS.HashSet NCName -> NCName
252 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
254 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
255 freshifyNCName ns (NCName n) =
256 let ints = [1..] :: [Int] in
259 | suffix <- mempty : (show <$> ints)
260 , fresh <- [ NCName $ n <> TL.pack suffix]
261 , not $ fresh `HS.member` ns
267 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
268 , pNameLocal :: NCName -- ^ eg. "stylesheet"
269 } deriving (Eq, Ord, Generic)
270 instance Show PName where
271 showsPrec p PName{pNameSpace=Nothing, ..} =
272 showsPrec p pNameLocal
273 showsPrec _p PName{pNameSpace=Just p, ..} =
276 showsPrec 10 pNameLocal
277 instance IsString PName where
278 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
280 case List.break (== ':') s of
281 (_, "") -> PName Nothing $ fromString s
282 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
283 instance Hashable PName
285 pName :: NCName -> PName
286 pName = PName Nothing
292 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
293 , qNameLocal :: NCName -- ^ eg. "stylesheet"
294 } deriving (Eq, Ord, Generic)
295 instance Show QName where
296 showsPrec _p QName{..} =
297 (if TL.null $ unNamespace qNameSpace then id
298 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
299 ) . showsPrec 10 qNameLocal
300 instance IsString QName where
301 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
302 fromString full@('{':rest) =
303 case List.break (== '}') rest of
304 (_, "") -> error $ "Invalid XML Clark notation: "<>show full
305 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
306 fromString local = QName "" $ fromString local
307 instance Hashable QName
309 qName :: NCName -> QName
310 qName = QName (Namespace "")
318 } deriving (Eq, Ord, Functor)
319 instance (Show src, Show a) => Show (Sourced src a) where
320 showsPrec p Sourced{..} =
322 showsPrec 11 unSourced .
323 showString " @" . showsPrec 10 source
324 instance Semigroup a => Semigroup (Sourced (FileSource Offset) a) where
326 (Sourced rx@(FileRange xf xb xe :| xs) x)
327 (Sourced (FileRange yf yb ye :| _ys) y)
328 | xf == yf && xe == yb = Sourced (FileRange xf xb ye :| xs) $ x<>y
329 | otherwise = Sourced rx (x<>y)
331 instance (FromPad a, Semigroup a) => Semigroup (Sourced (FileSource LineCol) a) where
333 (Sourced rx@(FileRange xf xb xe :| xs) x)
334 (Sourced (FileRange yf yb ye :| _ys) y)
335 | xf == yf = Sourced (FileRange xf xb ye :| xs) $ x<>fromPad (LineColumn l c)<>y
336 | otherwise = Sourced rx (x<>y)
338 l = lineNum yb - lineNum xe
339 c = colNum yb - colNum (if l <= 0 then xe else xb)
341 -- ** Class 'FromPad'
342 class FromPad a where
343 fromPad :: LineColumn -> a
344 instance FromPad T.Text where
345 fromPad LineColumn{..} =
346 T.replicate lineNum "\n" <>
347 T.replicate colNum " "
348 instance FromPad TL.Text where
349 fromPad LineColumn{..} =
350 TL.replicate (fromIntegral lineNum) "\n" <>
351 TL.replicate (fromIntegral colNum) " "
352 instance FromPad EscapedText where
353 fromPad = EscapedText . pure . fromPad
354 instance FromPad Escaped where
355 fromPad = EscapedPlain . fromPad
358 -- ** Class 'NoSource'
359 class NoSource src where
361 instance NoSource pos => NoSource (FileSource pos) where
362 noSource = noSource :| []
363 instance NoSource pos => NoSource (FileRange pos) where
364 noSource = FileRange "" noSource noSource
365 instance NoSource Offset where
366 noSource = Offset def
367 instance NoSource LineColumn where
368 noSource = LineColumn def def
370 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
371 mempty = sourced0 mempty
375 notSourced :: NoSource src => a -> Sourced src a
376 notSourced = Sourced noSource
378 -- * Type 'FileSource'
379 type FileSource pos = NonEmpty (FileRange pos)
381 -- ** Type 'FileSourced'
382 type FileSourced = Sourced (FileSource Offset)
384 -- ** Type 'FileRange'
387 { fileRange_file :: FilePath
388 , fileRange_begin :: pos
389 , fileRange_end :: pos
391 instance Default (FileRange Offset) where
392 def = FileRange "" def def
393 instance Default (FileRange LineColumn) where
394 def = FileRange "" def def
395 instance Show (FileRange Offset) where
396 showsPrec _p FileRange{..} =
397 showString fileRange_file .
398 showChar '@' . showsPrec 10 fileRange_begin .
399 showChar '-' . showsPrec 10 fileRange_end
400 instance Show (FileRange LineColumn) where
401 showsPrec _p FileRange{..} =
402 showString fileRange_file .
403 showChar '#' . showsPrec 10 fileRange_begin .
404 showChar '-' . showsPrec 10 fileRange_end
407 newtype Offset = Offset Int
409 instance Show Offset where
410 showsPrec p (Offset o) = showsPrec p o
411 instance Default Offset where
413 instance Semigroup Offset where
414 Offset x <> Offset y = Offset (x+y)
415 instance Monoid Offset where
419 -- *** Type 'LineColumn'
420 -- | Absolute text file position.
421 data LineColumn = LineColumn
422 { lineNum :: {-# UNPACK #-} Offset
423 , colNum :: {-# UNPACK #-} Offset
425 instance Default LineColumn where
426 def = LineColumn def def
427 instance Show LineColumn where
428 showsPrec _p LineColumn{..} =
429 showsPrec 11 lineNum .
433 filePos1 :: LineColumn