1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
17 import Control.Applicative (Applicative(..), Alternative(..))
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((+), (-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
48 type XML = TS.Tree (Sourced (FileSource Offset) Node)
51 -- | Unify two 'XMLs', merging border 'NodeText's if any.
52 union :: XMLs -> XMLs -> XMLs
54 case (Seq.viewr x, Seq.viewl y) of
55 (xs Seq.:> x0, y0 Seq.:< ys) ->
57 ( Tree0 (Sourced sx (NodeText tx))
58 , Tree0 (Sourced sy (NodeText ty)) ) ->
60 Seq.singleton (Tree0 $ (NodeText <$>) $ Sourced sx tx <> Sourced sy ty) `union`
66 unions :: Foldable f => f XMLs -> XMLs
67 unions = foldl' union mempty
69 pattern Tree0 :: a -> TS.Tree a
70 pattern Tree0 a <- TS.Tree a (null -> True)
71 where Tree0 a = TS.Tree a Seq.empty
75 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
76 | NodeAttr QName -- ^ Node with a 'NodeText' child.
77 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
78 | NodeText EscapedText -- ^ Leaf.
79 | NodeComment TL.Text -- ^ Leaf.
80 | NodeCDATA TL.Text -- ^ Leaf.
81 deriving (Eq, Ord, Show)
83 -- ** Type 'EscapedText'
84 newtype EscapedText = EscapedText (Seq Escaped)
85 deriving (Eq, Ord, Show)
87 escapeText :: TL.Text -> EscapedText
90 case TL.span (`List.notElem` ("<>&'\""::String)) s of
91 (t, r) | TL.null t -> escape r
92 | otherwise -> EscapedPlain t Seq.<| escape r
94 escape t = case TL.uncons t of
96 Just (c, cs) -> escapeChar c Seq.<| et where EscapedText et = escapeText cs
98 escapeChar :: Char -> Escaped
101 '<' -> EscapedEntityRef entityRef_lt
102 '>' -> EscapedEntityRef entityRef_gt
103 '&' -> EscapedEntityRef entityRef_amp
104 '\'' -> EscapedEntityRef entityRef_apos
105 '"' -> EscapedEntityRef entityRef_quot
106 _ -> EscapedPlain $ TL.singleton c
108 unescapeText :: EscapedText -> TL.Text
109 unescapeText (EscapedText et) = (`foldMap` et) $ \case
111 EscapedEntityRef EntityRef{..} -> entityRef_value
112 EscapedCharRef (CharRef c) -> TL.singleton c
114 instance Semigroup EscapedText where
115 EscapedText x <> EscapedText y =
117 (xl Seq.:|> EscapedPlain xr, EscapedPlain yl Seq.:<|yr) ->
118 (EscapedText $ xl Seq.|> EscapedPlain (xr<>yl)) <> EscapedText yr
119 _ -> EscapedText $ x <> y
120 instance Monoid EscapedText where
121 mempty = EscapedText mempty
124 -- *** Type 'Escaped'
125 -- | 'EscapedText' lexemes.
127 = EscapedPlain TL.Text
128 | EscapedEntityRef EntityRef
129 | EscapedCharRef CharRef
130 deriving (Eq, Ord, Show)
132 -- *** Type 'EntityRef'
133 data EntityRef = EntityRef
134 { entityRef_name :: NCName
135 , entityRef_value :: TL.Text
136 } deriving (Eq, Ord, Show)
138 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
139 entityRef_lt = EntityRef (NCName "lt") "<"
140 entityRef_gt = EntityRef (NCName "gt") ">"
141 entityRef_amp = EntityRef (NCName "amp") "&"
142 entityRef_quot = EntityRef (NCName "quot") "\""
143 entityRef_apos = EntityRef (NCName "apos") "'"
145 -- *** Type 'CharRef'
146 newtype CharRef = CharRef Char
147 deriving (Eq, Ord, Show)
150 newtype Name = Name { unName :: TL.Text }
151 deriving (Eq, Ord, Hashable)
152 instance Show Name where
153 showsPrec _p = showString . TL.unpack . unName
154 instance IsString Name where
157 , XC.isXmlNameStartChar c
158 && all XC.isXmlNameChar cs
160 | otherwise = error $ "Invalid XML Name: "<>show s
162 -- ** Type 'Namespace'
163 newtype Namespace = Namespace { unNamespace :: TL.Text }
164 deriving (Eq, Ord, Show, Hashable)
165 instance IsString Namespace where
167 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
168 then Namespace (fromString s)
169 else error $ "Invalid XML Namespace: "<>show s
171 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
172 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
173 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
174 xmlns_empty = Namespace ""
176 -- * Type 'Namespaces'
177 data Namespaces prefix = Namespaces
178 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
179 , namespaces_default :: Namespace
181 instance Default (Namespaces NCName) where
183 { namespaces_prefixes = HM.fromList
184 [ (xmlns_xml , "xml")
185 , (xmlns_xmlns, "xmlns")
187 , namespaces_default = ""
189 instance Default (Namespaces (Maybe NCName)) where
191 { namespaces_prefixes = HM.fromList
192 [ (xmlns_xml , Just "xml")
193 , (xmlns_xmlns, Just "xmlns")
195 , namespaces_default = ""
197 instance Semigroup (Namespaces NCName) where
199 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
200 , namespaces_default = namespaces_default x
202 instance Semigroup (Namespaces (Maybe NCName)) where
204 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
205 , namespaces_default = namespaces_default x
207 instance Monoid (Namespaces NCName) where
210 instance Monoid (Namespaces (Maybe NCName)) where
214 prefixifyQName :: Namespaces NCName -> QName -> PName
215 prefixifyQName Namespaces{..} QName{..} =
218 if qNameSpace == namespaces_default
220 else HM.lookup qNameSpace namespaces_prefixes
221 , pNameLocal = qNameLocal
225 -- | Non-colonized name.
226 newtype NCName = NCName { unNCName :: TL.Text }
227 deriving (Eq, Ord, Hashable)
228 instance Show NCName where
229 showsPrec _p = showString . TL.unpack . unNCName
230 instance IsString NCName where
232 fromMaybe (error $ "Invalid XML NCName: "<>show s) $
235 ncName :: TL.Text -> Maybe NCName
239 | XC.isXmlNCNameStartChar c
240 , TL.all XC.isXmlNCNameChar cs
244 poolNCNames :: [NCName]
246 [ NCName $ TL.pack ("ns"<>show i)
250 freshNCName :: HS.HashSet NCName -> NCName
251 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
253 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
254 freshifyNCName ns (NCName n) =
255 let ints = [1..] :: [Int] in
258 | suffix <- mempty : (show <$> ints)
259 , fresh <- [ NCName $ n <> TL.pack suffix]
260 , not $ fresh `HS.member` ns
266 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
267 , pNameLocal :: NCName -- ^ eg. "stylesheet"
268 } deriving (Eq, Ord, Generic)
269 instance Show PName where
270 showsPrec p PName{pNameSpace=Nothing, ..} =
271 showsPrec p pNameLocal
272 showsPrec _p PName{pNameSpace=Just p, ..} =
275 showsPrec 10 pNameLocal
276 instance IsString PName where
277 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
279 case List.break (== ':') s of
280 (_, "") -> PName Nothing $ fromString s
281 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
282 instance Hashable PName
284 pName :: NCName -> PName
285 pName = PName Nothing
291 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
292 , qNameLocal :: NCName -- ^ eg. "stylesheet"
293 } deriving (Eq, Ord, Generic)
294 instance Show QName where
295 showsPrec _p QName{..} =
296 (if TL.null $ unNamespace qNameSpace then id
297 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
298 ) . showsPrec 10 qNameLocal
299 instance IsString QName where
300 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
301 fromString full@('{':rest) =
302 case List.break (== '}') rest of
303 (_, "") -> error $ "Invalid XML Clark notation: "<>show full
304 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
305 fromString local = QName "" $ fromString local
306 instance Hashable QName
308 qName :: NCName -> QName
309 qName = QName (Namespace "")
317 } deriving (Eq, Ord, Functor)
318 instance (Show src, Show a) => Show (Sourced src a) where
319 showsPrec p Sourced{..} =
321 showsPrec 11 unSourced .
322 showString " @" . showsPrec 10 source
323 instance Semigroup a => Semigroup (Sourced (FileSource Offset) a) where
325 (Sourced rx@(FileRange xf xb xe :| xs) x)
326 (Sourced (FileRange yf yb ye :| _ys) y)
327 | xf == yf && xe == yb = Sourced (FileRange xf xb ye :| xs) $ x<>y
328 | otherwise = Sourced rx (x<>y)
330 instance (FromPad a, Semigroup a) => Semigroup (Sourced (FileSource LineCol) a) where
332 (Sourced rx@(FileRange xf xb xe :| xs) x)
333 (Sourced (FileRange yf yb ye :| _ys) y)
334 | xf == yf = Sourced (FileRange xf xb ye :| xs) $ x<>fromPad (LineColumn l c)<>y
335 | otherwise = Sourced rx (x<>y)
337 l = lineNum yb - lineNum xe
338 c = colNum yb - colNum (if l <= 0 then xe else xb)
340 -- ** Class 'FromPad'
341 class FromPad a where
342 fromPad :: LineColumn -> a
343 instance FromPad T.Text where
344 fromPad LineColumn{..} =
345 T.replicate lineNum "\n" <>
346 T.replicate colNum " "
347 instance FromPad TL.Text where
348 fromPad LineColumn{..} =
349 TL.replicate (fromIntegral lineNum) "\n" <>
350 TL.replicate (fromIntegral colNum) " "
351 instance FromPad EscapedText where
352 fromPad = EscapedText . pure . fromPad
353 instance FromPad Escaped where
354 fromPad = EscapedPlain . fromPad
357 -- ** Class 'NoSource'
358 class NoSource src where
360 instance NoSource pos => NoSource (FileSource pos) where
361 noSource = noSource :| []
362 instance NoSource pos => NoSource (FileRange pos) where
363 noSource = FileRange "" noSource noSource
364 instance NoSource Offset where
365 noSource = Offset def
366 instance NoSource LineColumn where
367 noSource = LineColumn def def
369 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
370 mempty = sourced0 mempty
374 notSourced :: NoSource src => a -> Sourced src a
375 notSourced = Sourced noSource
377 -- * Type 'FileSource'
378 type FileSource pos = NonEmpty (FileRange pos)
380 -- ** Type 'FileSourced'
381 type FileSourced = Sourced (FileSource Offset)
383 -- ** Type 'FileRange'
386 { fileRange_file :: FilePath
387 , fileRange_begin :: pos
388 , fileRange_end :: pos
390 instance Default (FileRange Offset) where
391 def = FileRange "" def def
392 instance Default (FileRange LineColumn) where
393 def = FileRange "" def def
394 instance Show (FileRange Offset) where
395 showsPrec _p FileRange{..} =
396 showString fileRange_file .
397 showChar '@' . showsPrec 10 fileRange_begin .
398 showChar '-' . showsPrec 10 fileRange_end
399 instance Show (FileRange LineColumn) where
400 showsPrec _p FileRange{..} =
401 showString fileRange_file .
402 showChar '#' . showsPrec 10 fileRange_begin .
403 showChar '-' . showsPrec 10 fileRange_end
406 newtype Offset = Offset Int
408 instance Show Offset where
409 showsPrec p (Offset o) = showsPrec p o
410 instance Default Offset where
412 instance Semigroup Offset where
413 Offset x <> Offset y = Offset (x+y)
414 instance Monoid Offset where
418 -- *** Type 'LineColumn'
419 -- | Absolute text file position.
420 data LineColumn = LineColumn
421 { lineNum :: {-# UNPACK #-} Offset
422 , colNum :: {-# UNPACK #-} Offset
424 instance Default LineColumn where
425 def = LineColumn def def
426 instance Show LineColumn where
427 showsPrec _p LineColumn{..} =
428 showsPrec 11 lineNum .
432 filePos1 :: LineColumn