]> Git — Sourcephile - haskell/symantic-xml.git/blob - Language/Symantic/XML/Document.hs
RNC: fix empty text in interleaved alternatives
[haskell/symantic-xml.git] / Language / Symantic / XML / Document.hs
1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
12 , TS.Tree(..)
13 , TS.Trees
14 , TS.tree0
15 ) where
16
17 import Control.Applicative (Applicative(..), Alternative(..))
18 import Data.Bool
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
26 import Data.Int (Int)
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..), fromMaybe)
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
46
47 -- * Type 'XML'
48 type XML = TS.Tree (Sourced FileSource Node)
49 type XMLs = Seq XML
50
51 -- | Unify two 'XMLs', merging border 'NodeText's if any.
52 union :: XMLs -> XMLs -> XMLs
53 union x y =
54 case (Seq.viewr x, Seq.viewl y) of
55 (xs Seq.:> x0, y0 Seq.:< ys) ->
56 case (x0,y0) of
57 ( Tree0 (Sourced ssx@(FileRange {fileRange_file=fx}:|_sx) (NodeText tx))
58 , Tree0 (Sourced ssy@(FileRange {fileRange_file=fy}:|_sy) (NodeText ty)) ) | fx == fy ->
59 xs `union`
60 Seq.singleton (Tree0 $ (NodeText <$>) $ Sourced ssx tx <> Sourced ssy ty) `union`
61 ys
62 _ -> x <> y
63 (Seq.EmptyR, _) -> y
64 (_, Seq.EmptyL) -> x
65
66 unions :: Foldable f => f XMLs -> XMLs
67 unions = foldl' union mempty
68
69 pattern Tree0 :: a -> TS.Tree a
70 pattern Tree0 a <- TS.Tree a (null -> True)
71 where Tree0 a = TS.Tree a Seq.empty
72
73 -- ** Type 'Node'
74 data Node
75 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
76 | NodeAttr QName -- ^ Node with a 'NodeText' child.
77 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
78 | NodeText EscapedText -- ^ Leaf.
79 | NodeComment TL.Text -- ^ Leaf.
80 | NodeCDATA TL.Text -- ^ Leaf.
81 deriving (Eq, Ord, Show)
82
83 -- ** Type 'EscapedText'
84 type EscapedText = [Escaped]
85
86 escapeText :: TL.Text -> EscapedText
87 escapeText s =
88 case TL.span (`List.notElem` ("<>&'\""::String)) s of
89 (t, r) | TL.null t -> escape r
90 | otherwise -> EscapedPlain t : escape r
91 where
92 escape t = case TL.uncons t of
93 Nothing -> []
94 Just (c, cs) -> escapeChar c : escapeText cs
95
96 escapeChar :: Char -> Escaped
97 escapeChar c =
98 case c of
99 '<' -> EscapedEntityRef entityRef_lt
100 '>' -> EscapedEntityRef entityRef_gt
101 '&' -> EscapedEntityRef entityRef_amp
102 '\'' -> EscapedEntityRef entityRef_apos
103 '"' -> EscapedEntityRef entityRef_quot
104 _ -> EscapedPlain $ TL.singleton c
105
106 unescapeText :: EscapedText -> TL.Text
107 unescapeText = foldMap $ \case
108 EscapedPlain t -> t
109 EscapedEntityRef EntityRef{..} -> entityRef_value
110 EscapedCharRef (CharRef c) -> TL.singleton c
111
112 -- *** Type 'Escaped'
113 -- | 'EscapedText' lexemes.
114 data Escaped
115 = EscapedPlain TL.Text
116 | EscapedEntityRef EntityRef
117 | EscapedCharRef CharRef
118 deriving (Eq, Ord, Show)
119
120 -- *** Type 'EntityRef'
121 data EntityRef = EntityRef
122 { entityRef_name :: NCName
123 , entityRef_value :: TL.Text
124 } deriving (Eq, Ord, Show)
125
126 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
127 entityRef_lt = EntityRef (NCName "lt") "<"
128 entityRef_gt = EntityRef (NCName "gt") ">"
129 entityRef_amp = EntityRef (NCName "amp") "&"
130 entityRef_quot = EntityRef (NCName "quot") "\""
131 entityRef_apos = EntityRef (NCName "apos") "'"
132
133 -- *** Type 'CharRef'
134 newtype CharRef = CharRef Char
135 deriving (Eq, Ord, Show)
136
137 -- ** Type 'Name'
138 newtype Name = Name { unName :: TL.Text }
139 deriving (Eq, Ord, Hashable)
140 instance Show Name where
141 showsPrec _p = showString . TL.unpack . unName
142 instance IsString Name where
143 fromString s
144 | c:cs <- s
145 , XC.isXmlNameStartChar c
146 && all XC.isXmlNameChar cs
147 = Name (TL.pack s)
148 | otherwise = error "Invalid XML Name"
149
150 -- ** Type 'Namespace'
151 newtype Namespace = Namespace { unNamespace :: TL.Text }
152 deriving (Eq, Ord, Show, Hashable)
153 instance IsString Namespace where
154 fromString s =
155 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
156 then Namespace (fromString s)
157 else error $ "Invalid XML Namespace: " <> show s
158
159 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
160 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
161 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
162 xmlns_empty = Namespace ""
163
164 -- * Type 'Namespaces'
165 data Namespaces prefix = Namespaces
166 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
167 , namespaces_default :: Namespace
168 } deriving (Show)
169 instance Default (Namespaces NCName) where
170 def = Namespaces
171 { namespaces_prefixes = HM.fromList
172 [ (xmlns_xml , "xml")
173 , (xmlns_xmlns, "xmlns")
174 ]
175 , namespaces_default = ""
176 }
177 instance Default (Namespaces (Maybe NCName)) where
178 def = Namespaces
179 { namespaces_prefixes = HM.fromList
180 [ (xmlns_xml , Just "xml")
181 , (xmlns_xmlns, Just "xmlns")
182 ]
183 , namespaces_default = ""
184 }
185 instance Semigroup (Namespaces NCName) where
186 x <> y = Namespaces
187 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
188 , namespaces_default = namespaces_default x
189 }
190 instance Semigroup (Namespaces (Maybe NCName)) where
191 x <> y = Namespaces
192 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
193 , namespaces_default = namespaces_default x
194 }
195 instance Monoid (Namespaces NCName) where
196 mempty = def
197 mappend = (<>)
198 instance Monoid (Namespaces (Maybe NCName)) where
199 mempty = def
200 mappend = (<>)
201
202 prefixifyQName :: Namespaces NCName -> QName -> PName
203 prefixifyQName Namespaces{..} QName{..} =
204 PName
205 { pNameSpace =
206 if qNameSpace == namespaces_default
207 then Nothing
208 else HM.lookup qNameSpace namespaces_prefixes
209 , pNameLocal = qNameLocal
210 }
211
212 -- ** Type 'NCName'
213 -- | Non-colonized name.
214 newtype NCName = NCName { unNCName :: TL.Text }
215 deriving (Eq, Ord, Hashable)
216 instance Show NCName where
217 showsPrec _p = showString . TL.unpack . unNCName
218 instance IsString NCName where
219 fromString s =
220 fromMaybe (error "Invalid XML NCName") $
221 ncName (TL.pack s)
222
223 ncName :: TL.Text -> Maybe NCName
224 ncName t =
225 case TL.uncons t of
226 Just (c, cs)
227 | XC.isXmlNCNameStartChar c
228 , TL.all XC.isXmlNCNameChar cs
229 -> Just (NCName t)
230 _ -> Nothing
231
232 poolNCNames :: [NCName]
233 poolNCNames =
234 [ NCName $ TL.pack ("ns"<>show i)
235 | i <- [1 :: Int ..]
236 ]
237
238 freshNCName :: HS.HashSet NCName -> NCName
239 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
240
241 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
242 freshifyNCName ns (NCName n) =
243 let ints = [1..] :: [Int] in
244 List.head
245 [ fresh
246 | suffix <- mempty : (show <$> ints)
247 , fresh <- [ NCName $ n <> TL.pack suffix]
248 , not $ fresh `HS.member` ns
249 ]
250
251 -- ** Type 'PName'
252 -- | Prefixed name.
253 data PName = PName
254 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
255 , pNameLocal :: NCName -- ^ eg. "stylesheet"
256 } deriving (Eq, Ord, Generic)
257 instance Show PName where
258 showsPrec p PName{pNameSpace=Nothing, ..} =
259 showsPrec p pNameLocal
260 showsPrec _p PName{pNameSpace=Just p, ..} =
261 showsPrec 10 p .
262 showChar ':' .
263 showsPrec 10 pNameLocal
264 instance IsString PName where
265 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
266 fromString s =
267 case List.break (== ':') s of
268 (_, "") -> PName Nothing $ fromString s
269 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
270 instance Hashable PName
271
272 pName :: NCName -> PName
273 pName = PName Nothing
274 {-# INLINE pName #-}
275
276 -- ** Type 'QName'
277 -- | Qualified name.
278 data QName = QName
279 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
280 , qNameLocal :: NCName -- ^ eg. "stylesheet"
281 } deriving (Eq, Ord, Generic)
282 instance Show QName where
283 showsPrec _p QName{..} =
284 (if TL.null $ unNamespace qNameSpace then id
285 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
286 ) . showsPrec 10 qNameLocal
287 instance IsString QName where
288 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
289 fromString full@('{':rest) =
290 case List.break (== '}') rest of
291 (_, "") -> error ("Invalid XML Clark notation: " <> show full)
292 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
293 fromString local = QName "" $ fromString local
294 instance Hashable QName
295
296 qName :: NCName -> QName
297 qName = QName (Namespace "")
298 {-# INLINE qName #-}
299
300 -- * Type 'Sourced'
301 data Sourced src a
302 = Sourced
303 { source :: src
304 , unSourced :: a
305 } deriving (Eq, Ord, Functor)
306 instance (Show src, Show a) => Show (Sourced src a) where
307 showsPrec p Sourced{..} =
308 showParen (p > 10) $
309 showsPrec 11 unSourced .
310 showString " @" . showsPrec 10 source
311 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
312 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
313 Sourced (FileRange fx bx ey :| lx) $
314 x<>fromPad (FilePos lines columns)<>y
315 where
316 lines = filePos_line by - filePos_line ex
317 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
318
319 -- ** Class 'FromPad'
320 class FromPad a where
321 fromPad :: FilePos -> a
322 instance FromPad T.Text where
323 fromPad FilePos{..} =
324 T.replicate filePos_line "\n" <>
325 T.replicate filePos_column " "
326 instance FromPad TL.Text where
327 fromPad FilePos{..} =
328 TL.replicate (fromIntegral filePos_line) "\n" <>
329 TL.replicate (fromIntegral filePos_column) " "
330 instance FromPad EscapedText where
331 fromPad = pure . fromPad
332 instance FromPad Escaped where
333 fromPad = EscapedPlain . fromPad
334
335 -- ** Class 'NoSource'
336 class NoSource src where
337 noSource :: src
338 instance NoSource FileSource where
339 noSource = noSource :| []
340 instance NoSource FileRange where
341 noSource = FileRange "" filePos1 filePos1
342 {-
343 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
344 mempty = sourced0 mempty
345 mappend = (<>)
346 -}
347
348 notSourced :: NoSource src => a -> Sourced src a
349 notSourced = Sourced noSource
350
351 -- * Type 'FileSource'
352 type FileSource = NonEmpty FileRange
353
354 -- ** Type 'FileRange'
355 data FileRange
356 = FileRange
357 { fileRange_file :: FilePath
358 , fileRange_begin :: FilePos
359 , fileRange_end :: FilePos
360 } deriving (Eq, Ord)
361 instance Default FileRange where
362 def = FileRange "" filePos1 filePos1
363 instance Show FileRange where
364 showsPrec _p FileRange{..} =
365 showString fileRange_file .
366 showChar '#' . showsPrec 10 fileRange_begin .
367 showChar '-' . showsPrec 10 fileRange_end
368
369 -- *** Type 'FilePos'
370 -- | Absolute text file position.
371 data FilePos = FilePos
372 { filePos_line :: {-# UNPACK #-} LineNum
373 , filePos_column :: {-# UNPACK #-} ColNum
374 } deriving (Eq, Ord)
375 instance Default FilePos where
376 def = filePos1
377 instance Show FilePos where
378 showsPrec _p FilePos{..} =
379 showsPrec 11 filePos_line .
380 showChar ':' .
381 showsPrec 11 filePos_column
382
383 filePos1 :: FilePos
384 filePos1 = FilePos 1 1
385
386 -- **** Type 'LineNum'
387 type LineNum = Int
388
389 -- **** Type 'ColNum'
390 type ColNum = Int