]> Git — Sourcephile - haskell/symantic-xml.git/blob - Symantic/XML/Document.hs
562dc55d2e0e6032022bf62cf5eed634833c3771
[haskell/symantic-xml.git] / Symantic / XML / Document.hs
1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Symantic.XML.Document
11 ( module Symantic.XML.Document
12 , TS.Tree(..)
13 , TS.Trees
14 , TS.tree0
15 ) where
16
17 import Control.Applicative (Alternative(..))
18 import Data.Bool
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
26 import Data.Int (Int)
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((+), error)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text.Lazy as TL
44 import qualified Data.TreeSeq.Strict as TS
45
46 -- * Type 'XML'
47 type XML src = TS.Tree (Sourced src Node)
48 type XMLs src = Seq (XML src)
49
50 -- | Unify two 'XMLs', merging border 'NodeText's if any.
51 union :: Semigroup (Sourced src EscapedText) => XMLs src -> XMLs src -> XMLs src
52 union x y =
53 case (Seq.viewr x, Seq.viewl y) of
54 (xs Seq.:> x0, y0 Seq.:< ys) ->
55 case (x0,y0) of
56 ( Tree0 (Sourced sx (NodeText tx))
57 , Tree0 (Sourced sy (NodeText ty)) ) ->
58 xs `union`
59 Seq.singleton (Tree0 $ (NodeText <$>) $ Sourced sx tx <> Sourced sy ty) `union`
60 ys
61 _ -> x <> y
62 (Seq.EmptyR, _) -> y
63 (_, Seq.EmptyL) -> x
64
65 unions ::
66 Semigroup (Sourced src EscapedText) =>
67 Foldable f => f (XMLs src) -> XMLs src
68 unions = foldl' union mempty
69
70 pattern Tree0 :: a -> TS.Tree a
71 pattern Tree0 a <- TS.Tree a (null -> True)
72 where Tree0 a = TS.Tree a Seq.empty
73
74 -- ** Type 'Node'
75 data Node
76 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
77 | NodeAttr QName -- ^ Node with a 'NodeText' child.
78 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
79 | NodeText EscapedText -- ^ Leaf.
80 | NodeComment TL.Text -- ^ Leaf.
81 | NodeCDATA TL.Text -- ^ Leaf.
82 deriving (Eq, Ord, Show)
83
84 -- ** Type 'EscapedText'
85 newtype EscapedText = EscapedText (Seq Escaped)
86 deriving (Eq, Ord, Show)
87
88 escapeText :: TL.Text -> EscapedText
89 escapeText s =
90 EscapedText $
91 case TL.span (`List.notElem` ("<>&'\""::String)) s of
92 (t, r) | TL.null t -> escape r
93 | otherwise -> EscapedPlain t Seq.<| escape r
94 where
95 escape t = case TL.uncons t of
96 Nothing -> mempty
97 Just (c, cs) -> escapeChar c Seq.<| et where EscapedText et = escapeText cs
98
99 escapeChar :: Char -> Escaped
100 escapeChar c =
101 case c of
102 '<' -> EscapedEntityRef entityRef_lt
103 '>' -> EscapedEntityRef entityRef_gt
104 '&' -> EscapedEntityRef entityRef_amp
105 '\'' -> EscapedEntityRef entityRef_apos
106 '"' -> EscapedEntityRef entityRef_quot
107 _ -> EscapedPlain $ TL.singleton c
108
109 unescapeText :: EscapedText -> TL.Text
110 unescapeText (EscapedText et) = (`foldMap` et) $ \case
111 EscapedPlain t -> t
112 EscapedEntityRef EntityRef{..} -> entityRef_value
113 EscapedCharRef (CharRef c) -> TL.singleton c
114
115 instance Semigroup EscapedText where
116 EscapedText x <> EscapedText y =
117 case (x,y) of
118 (xl Seq.:|> EscapedPlain xr, EscapedPlain yl Seq.:<|yr) ->
119 (EscapedText $ xl Seq.|> EscapedPlain (xr<>yl)) <> EscapedText yr
120 _ -> EscapedText $ x <> y
121 instance Monoid EscapedText where
122 mempty = EscapedText mempty
123 mappend = (<>)
124
125 -- *** Type 'Escaped'
126 -- | 'EscapedText' lexemes.
127 data Escaped
128 = EscapedPlain TL.Text
129 | EscapedEntityRef EntityRef
130 | EscapedCharRef CharRef
131 deriving (Eq, Ord, Show)
132
133 -- *** Type 'EntityRef'
134 data EntityRef = EntityRef
135 { entityRef_name :: NCName
136 , entityRef_value :: TL.Text
137 } deriving (Eq, Ord, Show)
138
139 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
140 entityRef_lt = EntityRef (NCName "lt") "<"
141 entityRef_gt = EntityRef (NCName "gt") ">"
142 entityRef_amp = EntityRef (NCName "amp") "&"
143 entityRef_quot = EntityRef (NCName "quot") "\""
144 entityRef_apos = EntityRef (NCName "apos") "'"
145
146 -- *** Type 'CharRef'
147 newtype CharRef = CharRef Char
148 deriving (Eq, Ord, Show)
149
150 -- ** Type 'Name'
151 newtype Name = Name { unName :: TL.Text }
152 deriving (Eq, Ord, Hashable)
153 instance Show Name where
154 showsPrec _p = showString . TL.unpack . unName
155 instance IsString Name where
156 fromString s
157 | c:cs <- s
158 , XC.isXmlNameStartChar c
159 && all XC.isXmlNameChar cs
160 = Name (TL.pack s)
161 | otherwise = error $ "Invalid XML Name: "<>show s
162
163 -- ** Type 'Namespace'
164 newtype Namespace = Namespace { unNamespace :: TL.Text }
165 deriving (Eq, Ord, Show, Hashable)
166 instance IsString Namespace where
167 fromString s =
168 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
169 then Namespace (fromString s)
170 else error $ "Invalid XML Namespace: "<>show s
171
172 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
173 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
174 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
175 xmlns_empty = Namespace ""
176
177 -- * Type 'Namespaces'
178 data Namespaces prefix = Namespaces
179 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
180 , namespaces_default :: Namespace
181 } deriving (Show)
182 instance Default (Namespaces NCName) where
183 def = Namespaces
184 { namespaces_prefixes = HM.fromList
185 [ (xmlns_xml , "xml")
186 , (xmlns_xmlns, "xmlns")
187 ]
188 , namespaces_default = ""
189 }
190 instance Default (Namespaces (Maybe NCName)) where
191 def = Namespaces
192 { namespaces_prefixes = HM.fromList
193 [ (xmlns_xml , Just "xml")
194 , (xmlns_xmlns, Just "xmlns")
195 ]
196 , namespaces_default = ""
197 }
198 instance Semigroup (Namespaces NCName) where
199 x <> y = Namespaces
200 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
201 , namespaces_default = namespaces_default x
202 }
203 instance Semigroup (Namespaces (Maybe NCName)) where
204 x <> y = Namespaces
205 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
206 , namespaces_default = namespaces_default x
207 }
208 instance Monoid (Namespaces NCName) where
209 mempty = def
210 mappend = (<>)
211 instance Monoid (Namespaces (Maybe NCName)) where
212 mempty = def
213 mappend = (<>)
214
215 prefixifyQName :: Namespaces NCName -> QName -> PName
216 prefixifyQName Namespaces{..} QName{..} =
217 PName
218 { pNameSpace =
219 if qNameSpace == namespaces_default
220 then Nothing
221 else HM.lookup qNameSpace namespaces_prefixes
222 , pNameLocal = qNameLocal
223 }
224
225 -- ** Type 'NCName'
226 -- | Non-colonized name.
227 newtype NCName = NCName { unNCName :: TL.Text }
228 deriving (Eq, Ord, Hashable)
229 instance Show NCName where
230 showsPrec _p = showString . TL.unpack . unNCName
231 instance IsString NCName where
232 fromString s =
233 fromMaybe (error $ "Invalid XML NCName: "<>show s) $
234 ncName (TL.pack s)
235
236 ncName :: TL.Text -> Maybe NCName
237 ncName t =
238 case TL.uncons t of
239 Just (c, cs)
240 | XC.isXmlNCNameStartChar c
241 , TL.all XC.isXmlNCNameChar cs
242 -> Just (NCName t)
243 _ -> Nothing
244
245 poolNCNames :: [NCName]
246 poolNCNames =
247 [ NCName $ TL.pack ("ns"<>show i)
248 | i <- [1 :: Int ..]
249 ]
250
251 freshNCName :: HS.HashSet NCName -> NCName
252 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
253
254 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
255 freshifyNCName ns (NCName n) =
256 let ints = [1..] :: [Int] in
257 List.head
258 [ fresh
259 | suffix <- mempty : (show <$> ints)
260 , fresh <- [ NCName $ n <> TL.pack suffix]
261 , not $ fresh `HS.member` ns
262 ]
263
264 -- ** Type 'PName'
265 -- | Prefixed name.
266 data PName = PName
267 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
268 , pNameLocal :: NCName -- ^ eg. "stylesheet"
269 } deriving (Eq, Ord, Generic)
270 instance Show PName where
271 showsPrec p PName{pNameSpace=Nothing, ..} =
272 showsPrec p pNameLocal
273 showsPrec _p PName{pNameSpace=Just p, ..} =
274 showsPrec 10 p .
275 showChar ':' .
276 showsPrec 10 pNameLocal
277 instance IsString PName where
278 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
279 fromString s =
280 case List.break (== ':') s of
281 (_, "") -> PName Nothing $ fromString s
282 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
283 instance Hashable PName
284
285 pName :: NCName -> PName
286 pName = PName Nothing
287 {-# INLINE pName #-}
288
289 -- ** Type 'QName'
290 -- | Qualified name.
291 data QName = QName
292 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
293 , qNameLocal :: NCName -- ^ eg. "stylesheet"
294 } deriving (Eq, Ord, Generic)
295 instance Show QName where
296 showsPrec _p QName{..} =
297 (if TL.null $ unNamespace qNameSpace then id
298 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
299 ) . showsPrec 10 qNameLocal
300 instance IsString QName where
301 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
302 fromString full@('{':rest) =
303 case List.break (== '}') rest of
304 (_, "") -> error $ "Invalid XML Clark notation: "<>show full
305 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
306 fromString local = QName "" $ fromString local
307 instance Hashable QName
308
309 qName :: NCName -> QName
310 qName = QName (Namespace "")
311 {-# INLINE qName #-}
312
313 -- * Type 'Sourced'
314 data Sourced src a
315 = Sourced
316 { source :: src
317 , unSourced :: a
318 } deriving (Eq, Ord, Functor)
319 instance (Show src, Show a) => Show (Sourced src a) where
320 showsPrec p Sourced{..} =
321 showParen (p > 10) $
322 showsPrec 11 unSourced .
323 showString " @" . showsPrec 10 source
324 instance Semigroup a => Semigroup (Sourced (FileSource Offset) a) where
325 (<>)
326 (Sourced rx@(FileRange xf xb xe :| xs) x)
327 (Sourced (FileRange yf yb ye :| _ys) y)
328 | xf == yf && xe == yb = Sourced (FileRange xf xb ye :| xs) $ x<>y
329 | otherwise = Sourced rx (x<>y)
330 {-
331 instance (FromPad a, Semigroup a) => Semigroup (Sourced (FileSource LineCol) a) where
332 (<>)
333 (Sourced rx@(FileRange xf xb xe :| xs) x)
334 (Sourced (FileRange yf yb ye :| _ys) y)
335 | xf == yf = Sourced (FileRange xf xb ye :| xs) $ x<>fromPad (LineColumn l c)<>y
336 | otherwise = Sourced rx (x<>y)
337 where
338 l = lineNum yb - lineNum xe
339 c = colNum yb - colNum (if l <= 0 then xe else xb)
340
341 -- ** Class 'FromPad'
342 class FromPad a where
343 fromPad :: LineColumn -> a
344 instance FromPad T.Text where
345 fromPad LineColumn{..} =
346 T.replicate lineNum "\n" <>
347 T.replicate colNum " "
348 instance FromPad TL.Text where
349 fromPad LineColumn{..} =
350 TL.replicate (fromIntegral lineNum) "\n" <>
351 TL.replicate (fromIntegral colNum) " "
352 instance FromPad EscapedText where
353 fromPad = EscapedText . pure . fromPad
354 instance FromPad Escaped where
355 fromPad = EscapedPlain . fromPad
356 -}
357
358 -- ** Class 'NoSource'
359 class NoSource src where
360 noSource :: src
361 instance Default pos => NoSource (FileSource pos) where
362 noSource = noSource :| []
363 instance Default pos => NoSource (FileRange pos) where
364 noSource = FileRange "" def def
365 instance NoSource Offset where
366 noSource = Offset def
367 {-
368 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
369 mempty = sourced0 mempty
370 mappend = (<>)
371 -}
372
373 notSourced :: NoSource src => a -> Sourced src a
374 notSourced = Sourced noSource
375
376 -- * Type 'FileSource'
377 type FileSource pos = NonEmpty (FileRange pos)
378
379 -- ** Type 'FileSourced'
380 type FileSourced = Sourced (FileSource Offset)
381
382 -- ** Type 'FileRange'
383 data FileRange pos
384 = FileRange
385 { fileRange_file :: FilePath
386 , fileRange_begin :: pos
387 , fileRange_end :: pos
388 } deriving (Eq, Ord)
389 instance Default (FileRange Offset) where
390 def = FileRange "" def def
391 instance Default (FileRange LineColumn) where
392 def = FileRange "" def def
393 instance Show (FileRange Offset) where
394 showsPrec _p FileRange{..} =
395 showString fileRange_file .
396 showChar '@' . showsPrec 10 fileRange_begin .
397 showChar '-' . showsPrec 10 fileRange_end
398 instance Show (FileRange LineColumn) where
399 showsPrec _p FileRange{..} =
400 showString fileRange_file .
401 showChar '#' . showsPrec 10 fileRange_begin .
402 showChar '-' . showsPrec 10 fileRange_end
403
404 -- *** Type 'Offset'
405 newtype Offset = Offset Int
406 deriving (Eq, Ord)
407 instance Show Offset where
408 showsPrec p (Offset o) = showsPrec p o
409 instance Default Offset where
410 def = Offset 0
411 instance Semigroup Offset where
412 Offset x <> Offset y = Offset (x+y)
413 instance Monoid Offset where
414 mempty = def
415 mappend = (<>)
416
417 -- *** Type 'LineColumn'
418 -- | Absolute text file position.
419 data LineColumn = LineColumn
420 { lineNum :: {-# UNPACK #-} Offset
421 , colNum :: {-# UNPACK #-} Offset
422 } deriving (Eq, Ord)
423 instance Default LineColumn where
424 def = LineColumn def def
425 instance Show LineColumn where
426 showsPrec _p LineColumn{..} =
427 showsPrec 11 lineNum .
428 showChar ':' .
429 showsPrec 11 colNum
430
431 filePos1 :: LineColumn
432 filePos1 = def