1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
17 import Control.Applicative (Applicative(..), Alternative(..))
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
48 type XML = TS.Tree (Sourced FileSource Node)
51 -- | Unify two 'XMLs', merging border 'NodeText's if any.
52 union :: XMLs -> XMLs -> XMLs
54 case (Seq.viewr x, Seq.viewl y) of
55 (xs Seq.:> x0, y0 Seq.:< ys) ->
57 ( Tree0 (Sourced ssx@(FileRange {fileRange_file=fx}:|_sx) (NodeText tx))
58 , Tree0 (Sourced ssy@(FileRange {fileRange_file=fy}:|_sy) (NodeText ty)) ) | fx == fy ->
60 Seq.singleton (Tree0 $ (NodeText <$>) $ Sourced ssx tx <> Sourced ssy ty) `union`
66 unions :: Foldable f => f XMLs -> XMLs
67 unions = foldl' union mempty
69 pattern Tree0 :: a -> TS.Tree a
70 pattern Tree0 a <- TS.Tree a (null -> True)
71 where Tree0 a = TS.Tree a Seq.empty
75 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
76 | NodeAttr QName -- ^ Node with a 'NodeText' child.
77 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
78 | NodeText EscapedText -- ^ Leaf.
79 | NodeComment TL.Text -- ^ Leaf.
80 | NodeCDATA TL.Text -- ^ Leaf.
81 deriving (Eq, Ord, Show)
83 -- ** Type 'EscapedText'
84 type EscapedText = [Escaped]
86 escapeText :: TL.Text -> EscapedText
88 case TL.span (`List.notElem` ("<>&'\""::String)) s of
89 (t, r) | TL.null t -> escape r
90 | otherwise -> EscapedPlain t : escape r
92 escape t = case TL.uncons t of
94 Just (c, cs) -> escapeChar c : escapeText cs
96 escapeChar :: Char -> Escaped
99 '<' -> EscapedEntityRef entityRef_lt
100 '>' -> EscapedEntityRef entityRef_gt
101 '&' -> EscapedEntityRef entityRef_amp
102 '\'' -> EscapedEntityRef entityRef_apos
103 '"' -> EscapedEntityRef entityRef_quot
104 _ -> EscapedPlain $ TL.singleton c
106 unescapeText :: EscapedText -> TL.Text
107 unescapeText = foldMap $ \case
109 EscapedEntityRef EntityRef{..} -> entityRef_value
110 EscapedCharRef (CharRef c) -> TL.singleton c
112 -- *** Type 'Escaped'
113 -- | 'EscapedText' lexemes.
115 = EscapedPlain TL.Text
116 | EscapedEntityRef EntityRef
117 | EscapedCharRef CharRef
118 deriving (Eq, Ord, Show)
120 -- *** Type 'EntityRef'
121 data EntityRef = EntityRef
122 { entityRef_name :: NCName
123 , entityRef_value :: TL.Text
124 } deriving (Eq, Ord, Show)
126 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
127 entityRef_lt = EntityRef (NCName "lt") "<"
128 entityRef_gt = EntityRef (NCName "gt") ">"
129 entityRef_amp = EntityRef (NCName "amp") "&"
130 entityRef_quot = EntityRef (NCName "quot") "\""
131 entityRef_apos = EntityRef (NCName "apos") "'"
133 -- *** Type 'CharRef'
134 newtype CharRef = CharRef Char
135 deriving (Eq, Ord, Show)
138 newtype Name = Name { unName :: TL.Text }
139 deriving (Eq, Ord, Hashable)
140 instance Show Name where
141 showsPrec _p = showString . TL.unpack . unName
142 instance IsString Name where
145 , XC.isXmlNameStartChar c
146 && all XC.isXmlNameChar cs
148 | otherwise = error $ "Invalid XML Name: "<>show s
150 -- ** Type 'Namespace'
151 newtype Namespace = Namespace { unNamespace :: TL.Text }
152 deriving (Eq, Ord, Show, Hashable)
153 instance IsString Namespace where
155 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
156 then Namespace (fromString s)
157 else error $ "Invalid XML Namespace: "<>show s
159 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
160 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
161 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
162 xmlns_empty = Namespace ""
164 -- * Type 'Namespaces'
165 data Namespaces prefix = Namespaces
166 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
167 , namespaces_default :: Namespace
169 instance Default (Namespaces NCName) where
171 { namespaces_prefixes = HM.fromList
172 [ (xmlns_xml , "xml")
173 , (xmlns_xmlns, "xmlns")
175 , namespaces_default = ""
177 instance Default (Namespaces (Maybe NCName)) where
179 { namespaces_prefixes = HM.fromList
180 [ (xmlns_xml , Just "xml")
181 , (xmlns_xmlns, Just "xmlns")
183 , namespaces_default = ""
185 instance Semigroup (Namespaces NCName) where
187 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
188 , namespaces_default = namespaces_default x
190 instance Semigroup (Namespaces (Maybe NCName)) where
192 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
193 , namespaces_default = namespaces_default x
195 instance Monoid (Namespaces NCName) where
198 instance Monoid (Namespaces (Maybe NCName)) where
202 prefixifyQName :: Namespaces NCName -> QName -> PName
203 prefixifyQName Namespaces{..} QName{..} =
206 if qNameSpace == namespaces_default
208 else HM.lookup qNameSpace namespaces_prefixes
209 , pNameLocal = qNameLocal
213 -- | Non-colonized name.
214 newtype NCName = NCName { unNCName :: TL.Text }
215 deriving (Eq, Ord, Hashable)
216 instance Show NCName where
217 showsPrec _p = showString . TL.unpack . unNCName
218 instance IsString NCName where
220 fromMaybe (error $ "Invalid XML NCName: "<>show s) $
223 ncName :: TL.Text -> Maybe NCName
227 | XC.isXmlNCNameStartChar c
228 , TL.all XC.isXmlNCNameChar cs
232 poolNCNames :: [NCName]
234 [ NCName $ TL.pack ("ns"<>show i)
238 freshNCName :: HS.HashSet NCName -> NCName
239 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
241 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
242 freshifyNCName ns (NCName n) =
243 let ints = [1..] :: [Int] in
246 | suffix <- mempty : (show <$> ints)
247 , fresh <- [ NCName $ n <> TL.pack suffix]
248 , not $ fresh `HS.member` ns
254 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
255 , pNameLocal :: NCName -- ^ eg. "stylesheet"
256 } deriving (Eq, Ord, Generic)
257 instance Show PName where
258 showsPrec p PName{pNameSpace=Nothing, ..} =
259 showsPrec p pNameLocal
260 showsPrec _p PName{pNameSpace=Just p, ..} =
263 showsPrec 10 pNameLocal
264 instance IsString PName where
265 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
267 case List.break (== ':') s of
268 (_, "") -> PName Nothing $ fromString s
269 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
270 instance Hashable PName
272 pName :: NCName -> PName
273 pName = PName Nothing
279 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
280 , qNameLocal :: NCName -- ^ eg. "stylesheet"
281 } deriving (Eq, Ord, Generic)
282 instance Show QName where
283 showsPrec _p QName{..} =
284 (if TL.null $ unNamespace qNameSpace then id
285 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
286 ) . showsPrec 10 qNameLocal
287 instance IsString QName where
288 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
289 fromString full@('{':rest) =
290 case List.break (== '}') rest of
291 (_, "") -> error $ "Invalid XML Clark notation: "<>show full
292 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
293 fromString local = QName "" $ fromString local
294 instance Hashable QName
296 qName :: NCName -> QName
297 qName = QName (Namespace "")
305 } deriving (Eq, Ord, Functor)
306 instance (Show src, Show a) => Show (Sourced src a) where
307 showsPrec p Sourced{..} =
309 showsPrec 11 unSourced .
310 showString " @" . showsPrec 10 source
311 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
312 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
313 Sourced (FileRange fx bx ey :| lx) $
314 x<>fromPad (FilePos lines columns)<>y
316 lines = filePos_line by - filePos_line ex
317 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
319 -- ** Class 'FromPad'
320 class FromPad a where
321 fromPad :: FilePos -> a
322 instance FromPad T.Text where
323 fromPad FilePos{..} =
324 T.replicate filePos_line "\n" <>
325 T.replicate filePos_column " "
326 instance FromPad TL.Text where
327 fromPad FilePos{..} =
328 TL.replicate (fromIntegral filePos_line) "\n" <>
329 TL.replicate (fromIntegral filePos_column) " "
330 instance FromPad EscapedText where
331 fromPad = pure . fromPad
332 instance FromPad Escaped where
333 fromPad = EscapedPlain . fromPad
335 -- ** Class 'NoSource'
336 class NoSource src where
338 instance NoSource FileSource where
339 noSource = noSource :| []
340 instance NoSource FileRange where
341 noSource = FileRange "" filePos1 filePos1
343 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
344 mempty = sourced0 mempty
348 notSourced :: NoSource src => a -> Sourced src a
349 notSourced = Sourced noSource
351 -- * Type 'FileSource'
352 type FileSource = NonEmpty FileRange
354 -- ** Type 'FileRange'
357 { fileRange_file :: FilePath
358 , fileRange_begin :: FilePos
359 , fileRange_end :: FilePos
361 instance Default FileRange where
362 def = FileRange "" filePos1 filePos1
363 instance Show FileRange where
364 showsPrec _p FileRange{..} =
365 showString fileRange_file .
366 showChar '#' . showsPrec 10 fileRange_begin .
367 showChar '-' . showsPrec 10 fileRange_end
369 -- *** Type 'FilePos'
370 -- | Absolute text file position.
371 data FilePos = FilePos
372 { filePos_line :: {-# UNPACK #-} LineNum
373 , filePos_column :: {-# UNPACK #-} ColNum
375 instance Default FilePos where
377 instance Show FilePos where
378 showsPrec _p FilePos{..} =
379 showsPrec 11 filePos_line .
381 showsPrec 11 filePos_column
384 filePos1 = FilePos 1 1
386 -- **** Type 'LineNum'
389 -- **** Type 'ColNum'