{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE StrictData #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.Symantic.XML.Document ( module Language.Symantic.XML.Document , TS.Tree(..) , TS.Trees , TS.tree0 ) where import Control.Applicative (Alternative(..)) import Data.Bool import Data.Char (Char) import Data.Default.Class (Default(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), all) import Data.Function (($), (.), id) import Data.Functor (Functor(..), (<$>)) import Data.Hashable (Hashable(..)) import Data.Int (Int) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Ord (Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq) import Data.String (String, IsString(..)) import GHC.Generics (Generic) import Prelude ((-), error, fromIntegral) import System.IO (FilePath) import Text.Show (Show(..), showsPrec, showChar, showParen, showString) import qualified Data.Char.Properties.XMLCharProps as XC import qualified Data.HashMap.Strict as HM import qualified Data.HashSet as HS import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.TreeSeq.Strict as TS -- * Type 'XML' type XML = TS.Tree (Sourced FileSource Node) type XMLs = Seq XML pattern Tree0 :: a -> TS.Tree a pattern Tree0 a <- TS.Tree a (null -> True) where Tree0 a = TS.Tree a Seq.empty -- ** Type 'Node' data Node = NodeElem QName -- ^ Node with some 'NodeAttr' and then other 'Node's as children. | NodeAttr QName -- ^ Node with a 'NodeText' child. | NodePI PName TL.Text -- ^ Leaf (except for @@ which has 'NodeAttr's. | NodeText Text -- ^ Leaf. | NodeComment TL.Text -- ^ Leaf. | NodeCDATA TL.Text -- ^ Leaf. deriving (Eq, Ord, Show) -- ** Type 'Text' type Text = [TextLexeme] escapeText :: TL.Text -> Text escapeText s = case TL.span (`List.notElem` ("<>&'\""::String)) s of (t, r) | TL.null t -> escape r | otherwise -> TextLexemePlain t : escape r where escape t = case TL.uncons t of Nothing -> [] Just (c, cs) -> escapeChar c : escapeText cs escapeChar c = case c of '<' -> TextLexemeEntityRef entityRef_lt '>' -> TextLexemeEntityRef entityRef_gt '&' -> TextLexemeEntityRef entityRef_amp '\'' -> TextLexemeEntityRef entityRef_apos '"' -> TextLexemeEntityRef entityRef_quot _ -> TextLexemePlain $ TL.singleton c flatText :: Text -> TL.Text flatText = foldMap $ \case TextLexemePlain t -> t TextLexemeEntityRef EntityRef{..} -> entityRef_value TextLexemeCharRef (CharRef c) -> TL.singleton c -- *** Type 'TextLexeme' data TextLexeme = TextLexemePlain TL.Text | TextLexemeEntityRef EntityRef | TextLexemeCharRef CharRef deriving (Eq, Ord, Show) -- *** Type 'EntityRef' data EntityRef = EntityRef { entityRef_name :: NCName , entityRef_value :: TL.Text } deriving (Eq, Ord, Show) entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef entityRef_lt = EntityRef (NCName "lt") "<" entityRef_gt = EntityRef (NCName "gt") ">" entityRef_amp = EntityRef (NCName "amp") "&" entityRef_quot = EntityRef (NCName "quot") "\"" entityRef_apos = EntityRef (NCName "apos") "'" -- *** Type 'CharRef' newtype CharRef = CharRef Char deriving (Eq, Ord, Show) -- ** Type 'Name' newtype Name = Name { unName :: TL.Text } deriving (Eq, Ord, Hashable) instance Show Name where showsPrec _p = showString . TL.unpack . unName instance IsString Name where fromString s | c:cs <- s , XC.isXmlNameStartChar c && all XC.isXmlNameChar cs = Name (TL.pack s) | otherwise = error "Invalid XML Name" -- ** Type 'Namespace' newtype Namespace = Namespace { unNamespace :: TL.Text } deriving (Eq, Ord, Show, Hashable) instance IsString Namespace where fromString s = if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s then Namespace (fromString s) else error $ "Invalid XML Namespace: " <> show s xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace xmlns_xml = Namespace "http://www.w3.org/XML/1998/namespace" xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/" xmlns_empty = Namespace "" -- * Type 'Namespaces' data Namespaces prefix = Namespaces { namespaces_prefixes :: (HM.HashMap Namespace prefix) , namespaces_default :: Namespace } deriving (Show) instance Default (Namespaces NCName) where def = Namespaces { namespaces_prefixes = HM.fromList [ (xmlns_xml , "xml") , (xmlns_xmlns, "xmlns") ] , namespaces_default = "" } instance Default (Namespaces (Maybe NCName)) where def = Namespaces { namespaces_prefixes = HM.fromList [ (xmlns_xml , Just "xml") , (xmlns_xmlns, Just "xmlns") ] , namespaces_default = "" } instance Semigroup (Namespaces NCName) where x <> y = Namespaces { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y , namespaces_default = namespaces_default x } instance Semigroup (Namespaces (Maybe NCName)) where x <> y = Namespaces { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y) , namespaces_default = namespaces_default x } instance Monoid (Namespaces NCName) where mempty = def mappend = (<>) instance Monoid (Namespaces (Maybe NCName)) where mempty = def mappend = (<>) prefixifyQName :: Namespaces NCName -> QName -> PName prefixifyQName Namespaces{..} QName{..} = PName { pNameSpace = if qNameSpace == namespaces_default then Nothing else HM.lookup qNameSpace namespaces_prefixes , pNameLocal = qNameLocal } -- ** Type 'NCName' -- | Non-colonized name. newtype NCName = NCName { unNCName :: TL.Text } deriving (Eq, Ord, Hashable) instance Show NCName where showsPrec _p = showString . TL.unpack . unNCName instance IsString NCName where fromString s | c:cs <- s , XC.isXmlNCNameStartChar c && all XC.isXmlNCNameChar cs = NCName (TL.pack s) | otherwise = error "Invalid XML NCName" poolNCNames :: [NCName] poolNCNames = [ NCName $ TL.pack ("ns"<>show i) | i <- [1 :: Int ..] ] freshNCName :: HS.HashSet NCName -> NCName freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps freshifyNCName :: HS.HashSet NCName -> NCName -> NCName freshifyNCName ns (NCName n) = let ints = [1..] :: [Int] in List.head [ fresh | suffix <- mempty : (show <$> ints) , fresh <- [ NCName $ n <> TL.pack suffix] , not $ fresh `HS.member` ns ] -- ** Type 'PName' -- | Prefixed name. data PName = PName { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml" , pNameLocal :: NCName -- ^ eg. "stylesheet" } deriving (Eq, Ord, Generic) instance Show PName where showsPrec p PName{pNameSpace=Nothing, ..} = showsPrec p pNameLocal showsPrec _p PName{pNameSpace=Just p, ..} = showsPrec 10 p . showChar ':' . showsPrec 10 pNameLocal instance IsString PName where fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error. fromString s = case List.break (== ':') s of (_, "") -> PName Nothing $ fromString s (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local) instance Hashable PName pName :: NCName -> PName pName = PName Nothing {-# INLINE pName #-} -- ** Type 'QName' -- | Qualified name. data QName = QName { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform" , qNameLocal :: NCName -- ^ eg. "stylesheet" } deriving (Eq, Ord, Generic) instance Show QName where showsPrec _p QName{..} = (if TL.null $ unNamespace qNameSpace then id else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}' ) . showsPrec 10 qNameLocal instance IsString QName where fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error. fromString full@('{':rest) = case List.break (== '}') rest of (_, "") -> error ("Invalid XML Clark notation: " <> show full) (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local fromString local = QName "" $ fromString local instance Hashable QName qName :: NCName -> QName qName = QName (Namespace "") {-# INLINE qName #-} -- * Type 'Sourced' data Sourced src a = Sourced { source :: src , unSourced :: a } deriving (Eq, Ord, Functor) instance (Show src, Show a) => Show (Sourced src a) where showsPrec p Sourced{..} = showParen (p > 10) $ showsPrec 11 unSourced . showString " @" . showsPrec 10 source instance (FromPad a, Semigroup a) => Semigroup (Sourced FileSource a) where Sourced (FileRange fx bx ex :| lx) x <> Sourced (FileRange _fy by ey :| _ly) y = Sourced (FileRange fx bx ey :| lx) $ x<>fromPad (FilePos lines columns)<>y where lines = filePos_line by - filePos_line ex columns = filePos_column by - filePos_column (if lines <= 0 then ex else bx) -- ** Class 'FromPad' class FromPad a where fromPad :: FilePos -> a instance FromPad T.Text where fromPad FilePos{..} = T.replicate filePos_line "\n" <> T.replicate filePos_column " " instance FromPad TL.Text where fromPad FilePos{..} = TL.replicate (fromIntegral filePos_line) "\n" <> TL.replicate (fromIntegral filePos_column) " " -- ** Class 'NoSource' class NoSource src where noSource :: src instance NoSource FileSource where noSource = noSource :| [] instance NoSource FileRange where noSource = FileRange "" filePos1 filePos1 {- instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where mempty = sourced0 mempty mappend = (<>) -} notSourced :: NoSource src => a -> Sourced src a notSourced = Sourced noSource -- * Type 'FileSource' type FileSource = NonEmpty FileRange -- ** Type 'FileRange' data FileRange = FileRange { fileRange_file :: FilePath , fileRange_begin :: FilePos , fileRange_end :: FilePos } deriving (Eq, Ord) instance Default FileRange where def = FileRange "" filePos1 filePos1 instance Show FileRange where showsPrec _p FileRange{..} = showString fileRange_file . showChar '#' . showsPrec 10 fileRange_begin . showChar '-' . showsPrec 10 fileRange_end -- *** Type 'FilePos' -- | Absolute text file position. data FilePos = FilePos { filePos_line :: {-# UNPACK #-} LineNum , filePos_column :: {-# UNPACK #-} ColNum } deriving (Eq, Ord) instance Default FilePos where def = filePos1 instance Show FilePos where showsPrec _p FilePos{..} = showsPrec 11 filePos_line . showChar ':' . showsPrec 11 filePos_column filePos1 :: FilePos filePos1 = FilePos 1 1 -- **** Type 'LineNum' type LineNum = Int -- **** Type 'ColNum' type ColNum = Int