]> Git — Sourcephile - haskell/symantic-xml.git/blob - Language/Symantic/XML/Document.hs
Upgrade to megaparsec-7
[haskell/symantic-xml.git] / Language / Symantic / XML / Document.hs
1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
12 , TS.Tree(..)
13 , TS.Trees
14 , TS.tree0
15 ) where
16
17 import Control.Applicative (Applicative(..), Alternative(..))
18 import Data.Bool
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
26 import Data.Int (Int)
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((+), (-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
46
47 -- * Type 'XML'
48 type XML = TS.Tree (Sourced (FileSource Offset) Node)
49 type XMLs = Seq XML
50
51 -- | Unify two 'XMLs', merging border 'NodeText's if any.
52 union :: XMLs -> XMLs -> XMLs
53 union x y =
54 case (Seq.viewr x, Seq.viewl y) of
55 (xs Seq.:> x0, y0 Seq.:< ys) ->
56 case (x0,y0) of
57 ( Tree0 (Sourced sx (NodeText tx))
58 , Tree0 (Sourced sy (NodeText ty)) ) ->
59 xs `union`
60 Seq.singleton (Tree0 $ (NodeText <$>) $ Sourced sx tx <> Sourced sy ty) `union`
61 ys
62 _ -> x <> y
63 (Seq.EmptyR, _) -> y
64 (_, Seq.EmptyL) -> x
65
66 unions :: Foldable f => f XMLs -> XMLs
67 unions = foldl' union mempty
68
69 pattern Tree0 :: a -> TS.Tree a
70 pattern Tree0 a <- TS.Tree a (null -> True)
71 where Tree0 a = TS.Tree a Seq.empty
72
73 -- ** Type 'Node'
74 data Node
75 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
76 | NodeAttr QName -- ^ Node with a 'NodeText' child.
77 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
78 | NodeText EscapedText -- ^ Leaf.
79 | NodeComment TL.Text -- ^ Leaf.
80 | NodeCDATA TL.Text -- ^ Leaf.
81 deriving (Eq, Ord, Show)
82
83 -- ** Type 'EscapedText'
84 newtype EscapedText = EscapedText (Seq Escaped)
85 deriving (Eq, Ord, Show)
86
87 escapeText :: TL.Text -> EscapedText
88 escapeText s =
89 EscapedText $
90 case TL.span (`List.notElem` ("<>&'\""::String)) s of
91 (t, r) | TL.null t -> escape r
92 | otherwise -> EscapedPlain t Seq.<| escape r
93 where
94 escape t = case TL.uncons t of
95 Nothing -> mempty
96 Just (c, cs) -> escapeChar c Seq.<| et where EscapedText et = escapeText cs
97
98 escapeChar :: Char -> Escaped
99 escapeChar c =
100 case c of
101 '<' -> EscapedEntityRef entityRef_lt
102 '>' -> EscapedEntityRef entityRef_gt
103 '&' -> EscapedEntityRef entityRef_amp
104 '\'' -> EscapedEntityRef entityRef_apos
105 '"' -> EscapedEntityRef entityRef_quot
106 _ -> EscapedPlain $ TL.singleton c
107
108 unescapeText :: EscapedText -> TL.Text
109 unescapeText (EscapedText et) = (`foldMap` et) $ \case
110 EscapedPlain t -> t
111 EscapedEntityRef EntityRef{..} -> entityRef_value
112 EscapedCharRef (CharRef c) -> TL.singleton c
113
114 instance Semigroup EscapedText where
115 EscapedText x <> EscapedText y =
116 case (x,y) of
117 (xl Seq.:|> EscapedPlain xr, EscapedPlain yl Seq.:<|yr) ->
118 (EscapedText $ xl Seq.|> EscapedPlain (xr<>yl)) <> EscapedText yr
119 _ -> EscapedText $ x <> y
120 instance Monoid EscapedText where
121 mempty = EscapedText mempty
122 mappend = (<>)
123
124 -- *** Type 'Escaped'
125 -- | 'EscapedText' lexemes.
126 data Escaped
127 = EscapedPlain TL.Text
128 | EscapedEntityRef EntityRef
129 | EscapedCharRef CharRef
130 deriving (Eq, Ord, Show)
131
132 -- *** Type 'EntityRef'
133 data EntityRef = EntityRef
134 { entityRef_name :: NCName
135 , entityRef_value :: TL.Text
136 } deriving (Eq, Ord, Show)
137
138 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
139 entityRef_lt = EntityRef (NCName "lt") "<"
140 entityRef_gt = EntityRef (NCName "gt") ">"
141 entityRef_amp = EntityRef (NCName "amp") "&"
142 entityRef_quot = EntityRef (NCName "quot") "\""
143 entityRef_apos = EntityRef (NCName "apos") "'"
144
145 -- *** Type 'CharRef'
146 newtype CharRef = CharRef Char
147 deriving (Eq, Ord, Show)
148
149 -- ** Type 'Name'
150 newtype Name = Name { unName :: TL.Text }
151 deriving (Eq, Ord, Hashable)
152 instance Show Name where
153 showsPrec _p = showString . TL.unpack . unName
154 instance IsString Name where
155 fromString s
156 | c:cs <- s
157 , XC.isXmlNameStartChar c
158 && all XC.isXmlNameChar cs
159 = Name (TL.pack s)
160 | otherwise = error $ "Invalid XML Name: "<>show s
161
162 -- ** Type 'Namespace'
163 newtype Namespace = Namespace { unNamespace :: TL.Text }
164 deriving (Eq, Ord, Show, Hashable)
165 instance IsString Namespace where
166 fromString s =
167 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
168 then Namespace (fromString s)
169 else error $ "Invalid XML Namespace: "<>show s
170
171 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
172 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
173 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
174 xmlns_empty = Namespace ""
175
176 -- * Type 'Namespaces'
177 data Namespaces prefix = Namespaces
178 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
179 , namespaces_default :: Namespace
180 } deriving (Show)
181 instance Default (Namespaces NCName) where
182 def = Namespaces
183 { namespaces_prefixes = HM.fromList
184 [ (xmlns_xml , "xml")
185 , (xmlns_xmlns, "xmlns")
186 ]
187 , namespaces_default = ""
188 }
189 instance Default (Namespaces (Maybe NCName)) where
190 def = Namespaces
191 { namespaces_prefixes = HM.fromList
192 [ (xmlns_xml , Just "xml")
193 , (xmlns_xmlns, Just "xmlns")
194 ]
195 , namespaces_default = ""
196 }
197 instance Semigroup (Namespaces NCName) where
198 x <> y = Namespaces
199 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
200 , namespaces_default = namespaces_default x
201 }
202 instance Semigroup (Namespaces (Maybe NCName)) where
203 x <> y = Namespaces
204 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
205 , namespaces_default = namespaces_default x
206 }
207 instance Monoid (Namespaces NCName) where
208 mempty = def
209 mappend = (<>)
210 instance Monoid (Namespaces (Maybe NCName)) where
211 mempty = def
212 mappend = (<>)
213
214 prefixifyQName :: Namespaces NCName -> QName -> PName
215 prefixifyQName Namespaces{..} QName{..} =
216 PName
217 { pNameSpace =
218 if qNameSpace == namespaces_default
219 then Nothing
220 else HM.lookup qNameSpace namespaces_prefixes
221 , pNameLocal = qNameLocal
222 }
223
224 -- ** Type 'NCName'
225 -- | Non-colonized name.
226 newtype NCName = NCName { unNCName :: TL.Text }
227 deriving (Eq, Ord, Hashable)
228 instance Show NCName where
229 showsPrec _p = showString . TL.unpack . unNCName
230 instance IsString NCName where
231 fromString s =
232 fromMaybe (error $ "Invalid XML NCName: "<>show s) $
233 ncName (TL.pack s)
234
235 ncName :: TL.Text -> Maybe NCName
236 ncName t =
237 case TL.uncons t of
238 Just (c, cs)
239 | XC.isXmlNCNameStartChar c
240 , TL.all XC.isXmlNCNameChar cs
241 -> Just (NCName t)
242 _ -> Nothing
243
244 poolNCNames :: [NCName]
245 poolNCNames =
246 [ NCName $ TL.pack ("ns"<>show i)
247 | i <- [1 :: Int ..]
248 ]
249
250 freshNCName :: HS.HashSet NCName -> NCName
251 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
252
253 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
254 freshifyNCName ns (NCName n) =
255 let ints = [1..] :: [Int] in
256 List.head
257 [ fresh
258 | suffix <- mempty : (show <$> ints)
259 , fresh <- [ NCName $ n <> TL.pack suffix]
260 , not $ fresh `HS.member` ns
261 ]
262
263 -- ** Type 'PName'
264 -- | Prefixed name.
265 data PName = PName
266 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
267 , pNameLocal :: NCName -- ^ eg. "stylesheet"
268 } deriving (Eq, Ord, Generic)
269 instance Show PName where
270 showsPrec p PName{pNameSpace=Nothing, ..} =
271 showsPrec p pNameLocal
272 showsPrec _p PName{pNameSpace=Just p, ..} =
273 showsPrec 10 p .
274 showChar ':' .
275 showsPrec 10 pNameLocal
276 instance IsString PName where
277 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
278 fromString s =
279 case List.break (== ':') s of
280 (_, "") -> PName Nothing $ fromString s
281 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
282 instance Hashable PName
283
284 pName :: NCName -> PName
285 pName = PName Nothing
286 {-# INLINE pName #-}
287
288 -- ** Type 'QName'
289 -- | Qualified name.
290 data QName = QName
291 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
292 , qNameLocal :: NCName -- ^ eg. "stylesheet"
293 } deriving (Eq, Ord, Generic)
294 instance Show QName where
295 showsPrec _p QName{..} =
296 (if TL.null $ unNamespace qNameSpace then id
297 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
298 ) . showsPrec 10 qNameLocal
299 instance IsString QName where
300 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
301 fromString full@('{':rest) =
302 case List.break (== '}') rest of
303 (_, "") -> error $ "Invalid XML Clark notation: "<>show full
304 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
305 fromString local = QName "" $ fromString local
306 instance Hashable QName
307
308 qName :: NCName -> QName
309 qName = QName (Namespace "")
310 {-# INLINE qName #-}
311
312 -- * Type 'Sourced'
313 data Sourced src a
314 = Sourced
315 { source :: src
316 , unSourced :: a
317 } deriving (Eq, Ord, Functor)
318 instance (Show src, Show a) => Show (Sourced src a) where
319 showsPrec p Sourced{..} =
320 showParen (p > 10) $
321 showsPrec 11 unSourced .
322 showString " @" . showsPrec 10 source
323 instance Semigroup a => Semigroup (Sourced (FileSource Offset) a) where
324 (<>)
325 (Sourced rx@(FileRange xf xb xe :| xs) x)
326 (Sourced (FileRange yf yb ye :| _ys) y)
327 | xf == yf && xe == yb = Sourced (FileRange xf xb ye :| xs) $ x<>y
328 | otherwise = Sourced rx (x<>y)
329 {-
330 instance (FromPad a, Semigroup a) => Semigroup (Sourced (FileSource LineCol) a) where
331 (<>)
332 (Sourced rx@(FileRange xf xb xe :| xs) x)
333 (Sourced (FileRange yf yb ye :| _ys) y)
334 | xf == yf = Sourced (FileRange xf xb ye :| xs) $ x<>fromPad (LineColumn l c)<>y
335 | otherwise = Sourced rx (x<>y)
336 where
337 l = lineNum yb - lineNum xe
338 c = colNum yb - colNum (if l <= 0 then xe else xb)
339
340 -- ** Class 'FromPad'
341 class FromPad a where
342 fromPad :: LineColumn -> a
343 instance FromPad T.Text where
344 fromPad LineColumn{..} =
345 T.replicate lineNum "\n" <>
346 T.replicate colNum " "
347 instance FromPad TL.Text where
348 fromPad LineColumn{..} =
349 TL.replicate (fromIntegral lineNum) "\n" <>
350 TL.replicate (fromIntegral colNum) " "
351 instance FromPad EscapedText where
352 fromPad = EscapedText . pure . fromPad
353 instance FromPad Escaped where
354 fromPad = EscapedPlain . fromPad
355 -}
356
357 -- ** Class 'NoSource'
358 class NoSource src where
359 noSource :: src
360 instance NoSource pos => NoSource (FileSource pos) where
361 noSource = noSource :| []
362 instance NoSource pos => NoSource (FileRange pos) where
363 noSource = FileRange "" noSource noSource
364 instance NoSource Offset where
365 noSource = Offset def
366 instance NoSource LineColumn where
367 noSource = LineColumn def def
368 {-
369 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
370 mempty = sourced0 mempty
371 mappend = (<>)
372 -}
373
374 notSourced :: NoSource src => a -> Sourced src a
375 notSourced = Sourced noSource
376
377 -- * Type 'FileSource'
378 type FileSource pos = NonEmpty (FileRange pos)
379
380 -- ** Type 'FileSourced'
381 type FileSourced = Sourced (FileSource Offset)
382
383 -- ** Type 'FileRange'
384 data FileRange pos
385 = FileRange
386 { fileRange_file :: FilePath
387 , fileRange_begin :: pos
388 , fileRange_end :: pos
389 } deriving (Eq, Ord)
390 instance Default (FileRange Offset) where
391 def = FileRange "" def def
392 instance Default (FileRange LineColumn) where
393 def = FileRange "" def def
394 instance Show (FileRange Offset) where
395 showsPrec _p FileRange{..} =
396 showString fileRange_file .
397 showChar '@' . showsPrec 10 fileRange_begin .
398 showChar '-' . showsPrec 10 fileRange_end
399 instance Show (FileRange LineColumn) where
400 showsPrec _p FileRange{..} =
401 showString fileRange_file .
402 showChar '#' . showsPrec 10 fileRange_begin .
403 showChar '-' . showsPrec 10 fileRange_end
404
405 -- *** Type 'Offset'
406 newtype Offset = Offset Int
407 deriving (Eq, Ord)
408 instance Show Offset where
409 showsPrec p (Offset o) = showsPrec p o
410 instance Default Offset where
411 def = Offset 0
412 instance Semigroup Offset where
413 Offset x <> Offset y = Offset (x+y)
414 instance Monoid Offset where
415 mempty = def
416 mappend = (<>)
417
418 -- *** Type 'LineColumn'
419 -- | Absolute text file position.
420 data LineColumn = LineColumn
421 { lineNum :: {-# UNPACK #-} Offset
422 , colNum :: {-# UNPACK #-} Offset
423 } deriving (Eq, Ord)
424 instance Default LineColumn where
425 def = LineColumn def def
426 instance Show LineColumn where
427 showsPrec _p LineColumn{..} =
428 showsPrec 11 lineNum .
429 showChar ':' .
430 showsPrec 11 colNum
431
432 filePos1 :: LineColumn
433 filePos1 = def