1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
17 import Control.Applicative (Alternative(..))
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..))
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
48 type XML = TS.Tree (Sourced FileSource Node)
51 pattern Tree0 :: a -> TS.Tree a
52 pattern Tree0 a <- TS.Tree a (null -> True)
53 where Tree0 a = TS.Tree a Seq.empty
57 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
58 | NodeAttr QName -- ^ Node with a 'NodeText' child.
59 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
60 | NodeText Text -- ^ Leaf.
61 | NodeComment TL.Text -- ^ Leaf.
62 | NodeCDATA TL.Text -- ^ Leaf.
63 deriving (Eq, Ord, Show)
66 type Text = [TextLexeme]
68 escapeText :: TL.Text -> Text
70 case TL.span (`List.notElem` ("<>&'\""::String)) s of
71 (t, r) | TL.null t -> escape r
72 | otherwise -> TextLexemePlain t : escape r
74 escape t = case TL.uncons t of
76 Just (c, cs) -> escapeChar c : escapeText cs
79 '<' -> TextLexemeEntityRef entityRef_lt
80 '>' -> TextLexemeEntityRef entityRef_gt
81 '&' -> TextLexemeEntityRef entityRef_amp
82 '\'' -> TextLexemeEntityRef entityRef_apos
83 '"' -> TextLexemeEntityRef entityRef_quot
84 _ -> TextLexemePlain $ TL.singleton c
86 flatText :: Text -> TL.Text
87 flatText = foldMap $ \case
88 TextLexemePlain t -> t
89 TextLexemeEntityRef EntityRef{..} -> entityRef_value
90 TextLexemeCharRef (CharRef c) -> TL.singleton c
92 -- *** Type 'TextLexeme'
94 = TextLexemePlain TL.Text
95 | TextLexemeEntityRef EntityRef
96 | TextLexemeCharRef CharRef
97 deriving (Eq, Ord, Show)
99 -- *** Type 'EntityRef'
100 data EntityRef = EntityRef
101 { entityRef_name :: NCName
102 , entityRef_value :: TL.Text
103 } deriving (Eq, Ord, Show)
105 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
106 entityRef_lt = EntityRef (NCName "lt") "<"
107 entityRef_gt = EntityRef (NCName "gt") ">"
108 entityRef_amp = EntityRef (NCName "amp") "&"
109 entityRef_quot = EntityRef (NCName "quot") "\""
110 entityRef_apos = EntityRef (NCName "apos") "'"
112 -- *** Type 'CharRef'
113 newtype CharRef = CharRef Char
114 deriving (Eq, Ord, Show)
117 newtype Name = Name { unName :: TL.Text }
118 deriving (Eq, Ord, Hashable)
119 instance Show Name where
120 showsPrec _p = showString . TL.unpack . unName
121 instance IsString Name where
124 , XC.isXmlNameStartChar c
125 && all XC.isXmlNameChar cs
127 | otherwise = error "Invalid XML Name"
129 -- ** Type 'Namespace'
130 newtype Namespace = Namespace { unNamespace :: TL.Text }
131 deriving (Eq, Ord, Show, Hashable)
132 instance IsString Namespace where
134 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
135 then Namespace (fromString s)
136 else error $ "Invalid XML Namespace: " <> show s
138 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
139 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
140 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
141 xmlns_empty = Namespace ""
143 -- * Type 'Namespaces'
144 data Namespaces prefix = Namespaces
145 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
146 , namespaces_default :: Namespace
148 instance Default (Namespaces NCName) where
150 { namespaces_prefixes = HM.fromList
151 [ (xmlns_xml , "xml")
152 , (xmlns_xmlns, "xmlns")
154 , namespaces_default = ""
156 instance Default (Namespaces (Maybe NCName)) where
158 { namespaces_prefixes = HM.fromList
159 [ (xmlns_xml , Just "xml")
160 , (xmlns_xmlns, Just "xmlns")
162 , namespaces_default = ""
164 instance Semigroup (Namespaces NCName) where
166 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
167 , namespaces_default = namespaces_default x
169 instance Semigroup (Namespaces (Maybe NCName)) where
171 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
172 , namespaces_default = namespaces_default x
174 instance Monoid (Namespaces NCName) where
177 instance Monoid (Namespaces (Maybe NCName)) where
181 prefixifyQName :: Namespaces NCName -> QName -> PName
182 prefixifyQName Namespaces{..} QName{..} =
185 if qNameSpace == namespaces_default
187 else HM.lookup qNameSpace namespaces_prefixes
188 , pNameLocal = qNameLocal
192 -- | Non-colonized name.
193 newtype NCName = NCName { unNCName :: TL.Text }
194 deriving (Eq, Ord, Hashable)
195 instance Show NCName where
196 showsPrec _p = showString . TL.unpack . unNCName
197 instance IsString NCName where
200 , XC.isXmlNCNameStartChar c
201 && all XC.isXmlNCNameChar cs
203 | otherwise = error "Invalid XML NCName"
205 poolNCNames :: [NCName]
207 [ NCName $ TL.pack ("ns"<>show i)
211 freshNCName :: HS.HashSet NCName -> NCName
212 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
214 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
215 freshifyNCName ns (NCName n) =
216 let ints = [1..] :: [Int] in
219 | suffix <- mempty : (show <$> ints)
220 , fresh <- [ NCName $ n <> TL.pack suffix]
221 , not $ fresh `HS.member` ns
227 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
228 , pNameLocal :: NCName -- ^ eg. "stylesheet"
229 } deriving (Eq, Ord, Generic)
230 instance Show PName where
231 showsPrec p PName{pNameSpace=Nothing, ..} =
232 showsPrec p pNameLocal
233 showsPrec _p PName{pNameSpace=Just p, ..} =
236 showsPrec 10 pNameLocal
237 instance IsString PName where
238 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
240 case List.break (== ':') s of
241 (_, "") -> PName Nothing $ fromString s
242 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
243 instance Hashable PName
245 pName :: NCName -> PName
246 pName = PName Nothing
252 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
253 , qNameLocal :: NCName -- ^ eg. "stylesheet"
254 } deriving (Eq, Ord, Generic)
255 instance Show QName where
256 showsPrec _p QName{..} =
257 (if TL.null $ unNamespace qNameSpace then id
258 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
259 ) . showsPrec 10 qNameLocal
260 instance IsString QName where
261 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
262 fromString full@('{':rest) =
263 case List.break (== '}') rest of
264 (_, "") -> error ("Invalid XML Clark notation: " <> show full)
265 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
266 fromString local = QName "" $ fromString local
267 instance Hashable QName
269 qName :: NCName -> QName
270 qName = QName (Namespace "")
278 } deriving (Eq, Ord, Functor)
279 instance (Show src, Show a) => Show (Sourced src a) where
280 showsPrec p Sourced{..} =
282 showsPrec 11 unSourced .
283 showString " @" . showsPrec 10 source
284 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
285 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
286 Sourced (FileRange fx bx ey :| lx) $
287 x<>fromPad (FilePos lines columns)<>y
289 lines = filePos_line by - filePos_line ex
290 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
292 -- ** Class 'FromPad'
293 class FromPad a where
294 fromPad :: FilePos -> a
295 instance FromPad T.Text where
296 fromPad FilePos{..} =
297 T.replicate filePos_line "\n" <>
298 T.replicate filePos_column " "
299 instance FromPad TL.Text where
300 fromPad FilePos{..} =
301 TL.replicate (fromIntegral filePos_line) "\n" <>
302 TL.replicate (fromIntegral filePos_column) " "
304 -- ** Class 'NoSource'
305 class NoSource src where
307 instance NoSource FileSource where
308 noSource = noSource :| []
309 instance NoSource FileRange where
310 noSource = FileRange "" filePos1 filePos1
312 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
313 mempty = sourced0 mempty
317 notSourced :: NoSource src => a -> Sourced src a
318 notSourced = Sourced noSource
320 -- * Type 'FileSource'
321 type FileSource = NonEmpty FileRange
323 -- ** Type 'FileRange'
326 { fileRange_file :: FilePath
327 , fileRange_begin :: FilePos
328 , fileRange_end :: FilePos
330 instance Default FileRange where
331 def = FileRange "" filePos1 filePos1
332 instance Show FileRange where
333 showsPrec _p FileRange{..} =
334 showString fileRange_file .
335 showChar '#' . showsPrec 10 fileRange_begin .
336 showChar '-' . showsPrec 10 fileRange_end
338 -- *** Type 'FilePos'
339 -- | Absolute text file position.
340 data FilePos = FilePos
341 { filePos_line :: {-# UNPACK #-} LineNum
342 , filePos_column :: {-# UNPACK #-} ColNum
344 instance Default FilePos where
346 instance Show FilePos where
347 showsPrec _p FilePos{..} =
348 showsPrec 11 filePos_line .
350 showsPrec 11 filePos_column
353 filePos1 = FilePos 1 1
355 -- **** Type 'LineNum'
358 -- **** Type 'ColNum'