1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
17 import Control.Applicative (Applicative(..), Alternative(..))
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
48 type XML = TS.Tree (Sourced FileSource Node)
51 -- | Unify two 'XMLs', merging border 'NodeText's if any.
52 union :: XMLs -> XMLs -> XMLs
54 case (Seq.viewr x, Seq.viewl y) of
55 (xs Seq.:> x0, y0 Seq.:< ys) ->
57 ( Tree0 (Sourced ssx@(FileRange {fileRange_file=fx}:|_sx) (NodeText tx))
58 , Tree0 (Sourced ssy@(FileRange {fileRange_file=fy}:|_sy) (NodeText ty)) ) | fx == fy ->
60 Seq.singleton (Tree0 $ (NodeText <$>) $ Sourced ssx tx <> Sourced ssy ty) `union`
66 unions :: Foldable f => f XMLs -> XMLs
67 unions = foldl' union mempty
69 pattern Tree0 :: a -> TS.Tree a
70 pattern Tree0 a <- TS.Tree a (null -> True)
71 where Tree0 a = TS.Tree a Seq.empty
75 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
76 | NodeAttr QName -- ^ Node with a 'NodeText' child.
77 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
78 | NodeText EscapedText -- ^ Leaf.
79 | NodeComment TL.Text -- ^ Leaf.
80 | NodeCDATA TL.Text -- ^ Leaf.
81 deriving (Eq, Ord, Show)
83 -- ** Type 'EscapedText'
84 newtype EscapedText = EscapedText (Seq Escaped)
85 deriving (Eq, Ord, Show)
87 escapeText :: TL.Text -> EscapedText
90 case TL.span (`List.notElem` ("<>&'\""::String)) s of
91 (t, r) | TL.null t -> escape r
92 | otherwise -> EscapedPlain t Seq.<| escape r
94 escape t = case TL.uncons t of
96 Just (c, cs) -> escapeChar c Seq.<| et where EscapedText et = escapeText cs
98 escapeChar :: Char -> Escaped
101 '<' -> EscapedEntityRef entityRef_lt
102 '>' -> EscapedEntityRef entityRef_gt
103 '&' -> EscapedEntityRef entityRef_amp
104 '\'' -> EscapedEntityRef entityRef_apos
105 '"' -> EscapedEntityRef entityRef_quot
106 _ -> EscapedPlain $ TL.singleton c
108 unescapeText :: EscapedText -> TL.Text
109 unescapeText (EscapedText et) = (`foldMap` et) $ \case
111 EscapedEntityRef EntityRef{..} -> entityRef_value
112 EscapedCharRef (CharRef c) -> TL.singleton c
114 instance Semigroup EscapedText where
115 EscapedText x <> EscapedText y =
117 (xl Seq.:|> EscapedPlain xr, EscapedPlain yl Seq.:<|yr) ->
118 (EscapedText $ xl Seq.|> EscapedPlain (xr<>yl)) <> EscapedText yr
119 _ -> EscapedText $ x <> y
120 instance Monoid EscapedText where
121 mempty = EscapedText mempty
124 -- *** Type 'Escaped'
125 -- | 'EscapedText' lexemes.
127 = EscapedPlain TL.Text
128 | EscapedEntityRef EntityRef
129 | EscapedCharRef CharRef
130 deriving (Eq, Ord, Show)
132 -- *** Type 'EntityRef'
133 data EntityRef = EntityRef
134 { entityRef_name :: NCName
135 , entityRef_value :: TL.Text
136 } deriving (Eq, Ord, Show)
138 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
139 entityRef_lt = EntityRef (NCName "lt") "<"
140 entityRef_gt = EntityRef (NCName "gt") ">"
141 entityRef_amp = EntityRef (NCName "amp") "&"
142 entityRef_quot = EntityRef (NCName "quot") "\""
143 entityRef_apos = EntityRef (NCName "apos") "'"
145 -- *** Type 'CharRef'
146 newtype CharRef = CharRef Char
147 deriving (Eq, Ord, Show)
150 newtype Name = Name { unName :: TL.Text }
151 deriving (Eq, Ord, Hashable)
152 instance Show Name where
153 showsPrec _p = showString . TL.unpack . unName
154 instance IsString Name where
157 , XC.isXmlNameStartChar c
158 && all XC.isXmlNameChar cs
160 | otherwise = error $ "Invalid XML Name: "<>show s
162 -- ** Type 'Namespace'
163 newtype Namespace = Namespace { unNamespace :: TL.Text }
164 deriving (Eq, Ord, Show, Hashable)
165 instance IsString Namespace where
167 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
168 then Namespace (fromString s)
169 else error $ "Invalid XML Namespace: "<>show s
171 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
172 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
173 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
174 xmlns_empty = Namespace ""
176 -- * Type 'Namespaces'
177 data Namespaces prefix = Namespaces
178 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
179 , namespaces_default :: Namespace
181 instance Default (Namespaces NCName) where
183 { namespaces_prefixes = HM.fromList
184 [ (xmlns_xml , "xml")
185 , (xmlns_xmlns, "xmlns")
187 , namespaces_default = ""
189 instance Default (Namespaces (Maybe NCName)) where
191 { namespaces_prefixes = HM.fromList
192 [ (xmlns_xml , Just "xml")
193 , (xmlns_xmlns, Just "xmlns")
195 , namespaces_default = ""
197 instance Semigroup (Namespaces NCName) where
199 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
200 , namespaces_default = namespaces_default x
202 instance Semigroup (Namespaces (Maybe NCName)) where
204 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
205 , namespaces_default = namespaces_default x
207 instance Monoid (Namespaces NCName) where
210 instance Monoid (Namespaces (Maybe NCName)) where
214 prefixifyQName :: Namespaces NCName -> QName -> PName
215 prefixifyQName Namespaces{..} QName{..} =
218 if qNameSpace == namespaces_default
220 else HM.lookup qNameSpace namespaces_prefixes
221 , pNameLocal = qNameLocal
225 -- | Non-colonized name.
226 newtype NCName = NCName { unNCName :: TL.Text }
227 deriving (Eq, Ord, Hashable)
228 instance Show NCName where
229 showsPrec _p = showString . TL.unpack . unNCName
230 instance IsString NCName where
232 fromMaybe (error $ "Invalid XML NCName: "<>show s) $
235 ncName :: TL.Text -> Maybe NCName
239 | XC.isXmlNCNameStartChar c
240 , TL.all XC.isXmlNCNameChar cs
244 poolNCNames :: [NCName]
246 [ NCName $ TL.pack ("ns"<>show i)
250 freshNCName :: HS.HashSet NCName -> NCName
251 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
253 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
254 freshifyNCName ns (NCName n) =
255 let ints = [1..] :: [Int] in
258 | suffix <- mempty : (show <$> ints)
259 , fresh <- [ NCName $ n <> TL.pack suffix]
260 , not $ fresh `HS.member` ns
266 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
267 , pNameLocal :: NCName -- ^ eg. "stylesheet"
268 } deriving (Eq, Ord, Generic)
269 instance Show PName where
270 showsPrec p PName{pNameSpace=Nothing, ..} =
271 showsPrec p pNameLocal
272 showsPrec _p PName{pNameSpace=Just p, ..} =
275 showsPrec 10 pNameLocal
276 instance IsString PName where
277 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
279 case List.break (== ':') s of
280 (_, "") -> PName Nothing $ fromString s
281 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
282 instance Hashable PName
284 pName :: NCName -> PName
285 pName = PName Nothing
291 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
292 , qNameLocal :: NCName -- ^ eg. "stylesheet"
293 } deriving (Eq, Ord, Generic)
294 instance Show QName where
295 showsPrec _p QName{..} =
296 (if TL.null $ unNamespace qNameSpace then id
297 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
298 ) . showsPrec 10 qNameLocal
299 instance IsString QName where
300 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
301 fromString full@('{':rest) =
302 case List.break (== '}') rest of
303 (_, "") -> error $ "Invalid XML Clark notation: "<>show full
304 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
305 fromString local = QName "" $ fromString local
306 instance Hashable QName
308 qName :: NCName -> QName
309 qName = QName (Namespace "")
317 } deriving (Eq, Ord, Functor)
318 instance (Show src, Show a) => Show (Sourced src a) where
319 showsPrec p Sourced{..} =
321 showsPrec 11 unSourced .
322 showString " @" . showsPrec 10 source
323 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
324 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
325 Sourced (FileRange fx bx ey :| lx) $
326 x<>fromPad (FilePos lines columns)<>y
328 lines = filePos_line by - filePos_line ex
329 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
331 -- ** Class 'FromPad'
332 class FromPad a where
333 fromPad :: FilePos -> a
334 instance FromPad T.Text where
335 fromPad FilePos{..} =
336 T.replicate filePos_line "\n" <>
337 T.replicate filePos_column " "
338 instance FromPad TL.Text where
339 fromPad FilePos{..} =
340 TL.replicate (fromIntegral filePos_line) "\n" <>
341 TL.replicate (fromIntegral filePos_column) " "
342 instance FromPad EscapedText where
343 fromPad = EscapedText . pure . fromPad
344 instance FromPad Escaped where
345 fromPad = EscapedPlain . fromPad
347 -- ** Class 'NoSource'
348 class NoSource src where
350 instance NoSource FileSource where
351 noSource = noSource :| []
352 instance NoSource FileRange where
353 noSource = FileRange "" filePos1 filePos1
355 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
356 mempty = sourced0 mempty
360 notSourced :: NoSource src => a -> Sourced src a
361 notSourced = Sourced noSource
363 -- * Type 'FileSource'
364 type FileSource = NonEmpty FileRange
366 -- ** Type 'FileRange'
369 { fileRange_file :: FilePath
370 , fileRange_begin :: FilePos
371 , fileRange_end :: FilePos
373 instance Default FileRange where
374 def = FileRange "" filePos1 filePos1
375 instance Show FileRange where
376 showsPrec _p FileRange{..} =
377 showString fileRange_file .
378 showChar '#' . showsPrec 10 fileRange_begin .
379 showChar '-' . showsPrec 10 fileRange_end
381 -- *** Type 'FilePos'
382 -- | Absolute text file position.
383 data FilePos = FilePos
384 { filePos_line :: {-# UNPACK #-} LineNum
385 , filePos_column :: {-# UNPACK #-} ColNum
387 instance Default FilePos where
389 instance Show FilePos where
390 showsPrec _p FilePos{..} =
391 showsPrec 11 filePos_line .
393 showsPrec 11 filePos_column
396 filePos1 = FilePos 1 1
398 -- **** Type 'LineNum'
401 -- **** Type 'ColNum'