]> Git — Sourcephile - haskell/symantic-xml.git/blob - Language/Symantic/XML/Document.hs
XML: polish comments
[haskell/symantic-xml.git] / Language / Symantic / XML / Document.hs
1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE PatternSynonyms #-}
6 {-# LANGUAGE StrictData #-}
7 {-# LANGUAGE TypeFamilies #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 module Language.Symantic.XML.Document
11 ( module Language.Symantic.XML.Document
12 , TS.Tree(..)
13 , TS.Trees
14 , TS.tree0
15 ) where
16
17 import Control.Applicative (Alternative(..))
18 import Data.Bool
19 import Data.Char (Char)
20 import Data.Default.Class (Default(..))
21 import Data.Eq (Eq(..))
22 import Data.Foldable (Foldable(..), all)
23 import Data.Function (($), (.), id)
24 import Data.Functor (Functor(..), (<$>))
25 import Data.Hashable (Hashable(..))
26 import Data.Int (Int)
27 import Data.List.NonEmpty (NonEmpty(..))
28 import Data.Maybe (Maybe(..))
29 import Data.Monoid (Monoid(..))
30 import Data.Ord (Ord(..))
31 import Data.Semigroup (Semigroup(..))
32 import Data.Sequence (Seq)
33 import Data.String (String, IsString(..))
34 import GHC.Generics (Generic)
35 import Prelude ((-), error, fromIntegral)
36 import System.IO (FilePath)
37 import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
38 import qualified Data.Char.Properties.XMLCharProps as XC
39 import qualified Data.HashMap.Strict as HM
40 import qualified Data.HashSet as HS
41 import qualified Data.List as List
42 import qualified Data.Sequence as Seq
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.TreeSeq.Strict as TS
46
47 -- * Type 'XML'
48 type XML = TS.Tree (Sourced FileSource Node)
49 type XMLs = Seq XML
50
51 pattern Tree0 :: a -> TS.Tree a
52 pattern Tree0 a <- TS.Tree a (null -> True)
53 where Tree0 a = TS.Tree a Seq.empty
54
55 -- ** Type 'Node'
56 data Node
57 = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
58 | NodeAttr QName -- ^ Node with a 'NodeText' child.
59 | NodePI PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
60 | NodeText Text -- ^ Leaf.
61 | NodeComment TL.Text -- ^ Leaf.
62 | NodeCDATA TL.Text -- ^ Leaf.
63 deriving (Eq, Ord, Show)
64
65 -- ** Type 'Text'
66 type Text = [TextLexeme]
67
68 escapeText :: TL.Text -> Text
69 escapeText s =
70 case TL.span (`List.notElem` ("<>&'\""::String)) s of
71 (t, r) | TL.null t -> escape r
72 | otherwise -> TextLexemePlain t : escape r
73 where
74 escape t = case TL.uncons t of
75 Nothing -> []
76 Just (c, cs) -> escapeChar c : escapeText cs
77 escapeChar c =
78 case c of
79 '<' -> TextLexemeEntityRef entityRef_lt
80 '>' -> TextLexemeEntityRef entityRef_gt
81 '&' -> TextLexemeEntityRef entityRef_amp
82 '\'' -> TextLexemeEntityRef entityRef_apos
83 '"' -> TextLexemeEntityRef entityRef_quot
84 _ -> TextLexemePlain $ TL.singleton c
85
86 flatText :: Text -> TL.Text
87 flatText = foldMap $ \case
88 TextLexemePlain t -> t
89 TextLexemeEntityRef EntityRef{..} -> entityRef_value
90 TextLexemeCharRef (CharRef c) -> TL.singleton c
91
92 -- *** Type 'TextLexeme'
93 data TextLexeme
94 = TextLexemePlain TL.Text
95 | TextLexemeEntityRef EntityRef
96 | TextLexemeCharRef CharRef
97 deriving (Eq, Ord, Show)
98
99 -- *** Type 'EntityRef'
100 data EntityRef = EntityRef
101 { entityRef_name :: NCName
102 , entityRef_value :: TL.Text
103 } deriving (Eq, Ord, Show)
104
105 entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
106 entityRef_lt = EntityRef (NCName "lt") "<"
107 entityRef_gt = EntityRef (NCName "gt") ">"
108 entityRef_amp = EntityRef (NCName "amp") "&"
109 entityRef_quot = EntityRef (NCName "quot") "\""
110 entityRef_apos = EntityRef (NCName "apos") "'"
111
112 -- *** Type 'CharRef'
113 newtype CharRef = CharRef Char
114 deriving (Eq, Ord, Show)
115
116 -- ** Type 'Name'
117 newtype Name = Name { unName :: TL.Text }
118 deriving (Eq, Ord, Hashable)
119 instance Show Name where
120 showsPrec _p = showString . TL.unpack . unName
121 instance IsString Name where
122 fromString s
123 | c:cs <- s
124 , XC.isXmlNameStartChar c
125 && all XC.isXmlNameChar cs
126 = Name (TL.pack s)
127 | otherwise = error "Invalid XML Name"
128
129 -- ** Type 'Namespace'
130 newtype Namespace = Namespace { unNamespace :: TL.Text }
131 deriving (Eq, Ord, Show, Hashable)
132 instance IsString Namespace where
133 fromString s =
134 if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
135 then Namespace (fromString s)
136 else error $ "Invalid XML Namespace: " <> show s
137
138 xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
139 xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace"
140 xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
141 xmlns_empty = Namespace ""
142
143 -- * Type 'Namespaces'
144 data Namespaces prefix = Namespaces
145 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
146 , namespaces_default :: Namespace
147 } deriving (Show)
148 instance Default (Namespaces NCName) where
149 def = Namespaces
150 { namespaces_prefixes = HM.fromList
151 [ (xmlns_xml , "xml")
152 , (xmlns_xmlns, "xmlns")
153 ]
154 , namespaces_default = ""
155 }
156 instance Default (Namespaces (Maybe NCName)) where
157 def = Namespaces
158 { namespaces_prefixes = HM.fromList
159 [ (xmlns_xml , Just "xml")
160 , (xmlns_xmlns, Just "xmlns")
161 ]
162 , namespaces_default = ""
163 }
164 instance Semigroup (Namespaces NCName) where
165 x <> y = Namespaces
166 { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
167 , namespaces_default = namespaces_default x
168 }
169 instance Semigroup (Namespaces (Maybe NCName)) where
170 x <> y = Namespaces
171 { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
172 , namespaces_default = namespaces_default x
173 }
174 instance Monoid (Namespaces NCName) where
175 mempty = def
176 mappend = (<>)
177 instance Monoid (Namespaces (Maybe NCName)) where
178 mempty = def
179 mappend = (<>)
180
181 prefixifyQName :: Namespaces NCName -> QName -> PName
182 prefixifyQName Namespaces{..} QName{..} =
183 PName
184 { pNameSpace =
185 if qNameSpace == namespaces_default
186 then Nothing
187 else HM.lookup qNameSpace namespaces_prefixes
188 , pNameLocal = qNameLocal
189 }
190
191 -- ** Type 'NCName'
192 -- | Non-colonized name.
193 newtype NCName = NCName { unNCName :: TL.Text }
194 deriving (Eq, Ord, Hashable)
195 instance Show NCName where
196 showsPrec _p = showString . TL.unpack . unNCName
197 instance IsString NCName where
198 fromString s
199 | c:cs <- s
200 , XC.isXmlNCNameStartChar c
201 && all XC.isXmlNCNameChar cs
202 = NCName (TL.pack s)
203 | otherwise = error "Invalid XML NCName"
204
205 poolNCNames :: [NCName]
206 poolNCNames =
207 [ NCName $ TL.pack ("ns"<>show i)
208 | i <- [1 :: Int ..]
209 ]
210
211 freshNCName :: HS.HashSet NCName -> NCName
212 freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps
213
214 freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
215 freshifyNCName ns (NCName n) =
216 let ints = [1..] :: [Int] in
217 List.head
218 [ fresh
219 | suffix <- mempty : (show <$> ints)
220 , fresh <- [ NCName $ n <> TL.pack suffix]
221 , not $ fresh `HS.member` ns
222 ]
223
224 -- ** Type 'PName'
225 -- | Prefixed name.
226 data PName = PName
227 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
228 , pNameLocal :: NCName -- ^ eg. "stylesheet"
229 } deriving (Eq, Ord, Generic)
230 instance Show PName where
231 showsPrec p PName{pNameSpace=Nothing, ..} =
232 showsPrec p pNameLocal
233 showsPrec _p PName{pNameSpace=Just p, ..} =
234 showsPrec 10 p .
235 showChar ':' .
236 showsPrec 10 pNameLocal
237 instance IsString PName where
238 fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
239 fromString s =
240 case List.break (== ':') s of
241 (_, "") -> PName Nothing $ fromString s
242 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
243 instance Hashable PName
244
245 pName :: NCName -> PName
246 pName = PName Nothing
247 {-# INLINE pName #-}
248
249 -- ** Type 'QName'
250 -- | Qualified name.
251 data QName = QName
252 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
253 , qNameLocal :: NCName -- ^ eg. "stylesheet"
254 } deriving (Eq, Ord, Generic)
255 instance Show QName where
256 showsPrec _p QName{..} =
257 (if TL.null $ unNamespace qNameSpace then id
258 else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
259 ) . showsPrec 10 qNameLocal
260 instance IsString QName where
261 fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
262 fromString full@('{':rest) =
263 case List.break (== '}') rest of
264 (_, "") -> error ("Invalid XML Clark notation: " <> show full)
265 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
266 fromString local = QName "" $ fromString local
267 instance Hashable QName
268
269 qName :: NCName -> QName
270 qName = QName (Namespace "")
271 {-# INLINE qName #-}
272
273 -- * Type 'Sourced'
274 data Sourced src a
275 = Sourced
276 { source :: src
277 , unSourced :: a
278 } deriving (Eq, Ord, Functor)
279 instance (Show src, Show a) => Show (Sourced src a) where
280 showsPrec p Sourced{..} =
281 showParen (p > 10) $
282 showsPrec 11 unSourced .
283 showString " @" . showsPrec 10 source
284 instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where
285 Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y =
286 Sourced (FileRange fx bx ey :| lx) $
287 x<>fromPad (FilePos lines columns)<>y
288 where
289 lines = filePos_line by - filePos_line ex
290 columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx)
291
292 -- ** Class 'FromPad'
293 class FromPad a where
294 fromPad :: FilePos -> a
295 instance FromPad T.Text where
296 fromPad FilePos{..} =
297 T.replicate filePos_line "\n" <>
298 T.replicate filePos_column " "
299 instance FromPad TL.Text where
300 fromPad FilePos{..} =
301 TL.replicate (fromIntegral filePos_line) "\n" <>
302 TL.replicate (fromIntegral filePos_column) " "
303
304 -- ** Class 'NoSource'
305 class NoSource src where
306 noSource :: src
307 instance NoSource FileSource where
308 noSource = noSource :| []
309 instance NoSource FileRange where
310 noSource = FileRange "" filePos1 filePos1
311 {-
312 instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
313 mempty = sourced0 mempty
314 mappend = (<>)
315 -}
316
317 notSourced :: NoSource src => a -> Sourced src a
318 notSourced = Sourced noSource
319
320 -- * Type 'FileSource'
321 type FileSource = NonEmpty FileRange
322
323 -- ** Type 'FileRange'
324 data FileRange
325 = FileRange
326 { fileRange_file :: FilePath
327 , fileRange_begin :: FilePos
328 , fileRange_end :: FilePos
329 } deriving (Eq, Ord)
330 instance Default FileRange where
331 def = FileRange "" filePos1 filePos1
332 instance Show FileRange where
333 showsPrec _p FileRange{..} =
334 showString fileRange_file .
335 showChar '#' . showsPrec 10 fileRange_begin .
336 showChar '-' . showsPrec 10 fileRange_end
337
338 -- *** Type 'FilePos'
339 -- | Absolute text file position.
340 data FilePos = FilePos
341 { filePos_line :: {-# UNPACK #-} LineNum
342 , filePos_column :: {-# UNPACK #-} ColNum
343 } deriving (Eq, Ord)
344 instance Default FilePos where
345 def = filePos1
346 instance Show FilePos where
347 showsPrec _p FilePos{..} =
348 showsPrec 11 filePos_line .
349 showChar ':' .
350 showsPrec 11 filePos_column
351
352 filePos1 :: FilePos
353 filePos1 = FilePos 1 1
354
355 -- **** Type 'LineNum'
356 type LineNum = Int
357
358 -- **** Type 'ColNum'
359 type ColNum = Int