{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | Read DTC from TCT. module Hdoc.DTC.Read.TCT where import Control.Applicative (Applicative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Either (Either(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.), const, id) import Data.Functor ((<$>), (<$)) import Data.Int (Int) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..), maybe) import Data.Monoid (Monoid(..), First(..)) import Data.Ord (Ord(..)) import Data.Proxy (Proxy(..)) import Data.Ratio ((%)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (ViewL(..), (|>)) import Data.String (String) import Data.Tuple (fst, snd) import Text.Blaze.DTC (xmlns_dtc) import Text.Read (readMaybe, Read(..)) import Text.Show (Show(..)) import qualified Control.Monad.Trans.State as S import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Ratio as Ratio import qualified Data.Sequence as Seq import qualified Data.Set as Set import qualified Data.Text.Lazy as TL import qualified GHC.Read as Read (expectP) import qualified Language.Symantic.RNC as RNC import qualified Language.Symantic.XML as XML import qualified Text.Megaparsec as P import qualified Text.Megaparsec.Perm as P import qualified Text.Read as Read import Hdoc.TCT hiding (Parser, ErrorRead) import Hdoc.Utils (Nat(..), Nat1(..), succNat1) import qualified Hdoc.DTC.Document as DTC import qualified Hdoc.DTC.Sym as DTC import qualified Hdoc.RNC as RNC import qualified Hdoc.XML as XML import qualified Hdoc.TCT.Cell as TCT -- * Type 'State' data State = State { state_posXML :: XML.Pos , state_locTCT :: TCT.Location -- ^ Unfortunately Megaparsec's 'P.statePos' -- is not a good fit to encode 'TCT.Location'. } deriving (Eq,Show) instance Default State where def = State { state_posXML = def , state_locTCT = def } -- * Type 'Parser' type Parser = S.StateT State (P.Parsec ErrorRead XML.XMLs) instance RNC.Sym_Rule Parser where -- rule n p = P.dbg s p {-(p P. s)-} where s = Text.unpack n rule _n = id arg _n = pure () instance RNC.Sym_RNC Parser where namespace _p _n = pure () fail = P.label "fail" $ P.failure Nothing mempty any = P.label "any" $ p_satisfyMaybe $ const $ Just () anyElem ns p = P.label "anyElem" $ do Sourced state_locTCT (n, ts) <- P.token check $ Just expected parserElement n (p $ XML.qNameLocal n) (Sourced state_locTCT ts) where expected = XML.Tree (cell0 $ XML.NodeElem $ XML.QName ns $ XML.NCName "*") mempty check (Tree cell@(unSourced -> XML.NodeElem e) ts) | XML.qNameSpace e == ns = Right $ (e,ts) <$ cell check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) element n p = do ts <- P.token check $ Just expected parserElement n p ts where expected = Tree (cell0 $ XML.NodeElem n) mempty check (Tree cell@(unSourced -> XML.NodeElem e) ts) | e == n = Right (ts <$ cell) check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) {- element n p = do ts <- P.token check $ Just expected parserElement n p ts where expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts) | e == n = Right $ removePI $ removeXMLNS $ removeSpaces ts where removePI xs = (`Seq.filter` xs) $ \case XML.Tree (XML.unSourced -> XML.NodePI{}) _ts -> False _ -> True removeSpaces xs = if (`all` xs) $ \case XML.Tree (XML.unSourced -> XML.NodeText txt) _ts -> all (\case XML.EscapedPlain t -> TL.all Char.isSpace t _ -> False) txt _ -> True then (`Seq.filter` xs) $ \case XML.Tree (XML.unSourced -> XML.NodeText{}) _ts -> False _ -> True else xs removeXMLNS xs = let (attrs,rest) = (`Seq.spanl` xs) $ \case XML.Tree (XML.unSourced -> XML.NodeAttr{}) _ts -> True _ -> False in let attrs' = (`Seq.filter` attrs) $ \case XML.Tree (XML.unSourced -> XML.NodeAttr a) _ts -> case a of XML.QName "" "xmlns" -> False XML.QName ns _l -> ns /= XML.xmlns_xmlns _ -> True in attrs' <> rest check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) -} attribute n p = do v <- P.token check $ Just expected parser p v where expected = Tree0 (cell0 $ XML.NodeAttr n) check (Tree (unSourced -> XML.NodeAttr k) v) | [Tree (Sourced _ (XML.NodeText _v)) _] <- toList v , k == n = Right v check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) escapedText = do P.token check (Just expected) <* setPosOnNextNode where expected = Tree0 (cell0 $ XML.NodeText mempty) check (Tree0 (unSourced -> XML.NodeText t)) = Right t check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) try = P.try optional = P.optional option = P.option choice = P.choice {- instance Alternative Parser where (<|>) = (P.<|>) many = P.many some = P.some -} instance RNC.Sym_RNC_Extra Parser where none = P.label "none" $ P.eof comment = do s <- P.getInput case Seq.viewl s of Tree0 (unSourced -> XML.NodeComment c) :< ts -> do P.setInput ts c <$ setPosOnNextNode t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex EmptyL -> P.failure Nothing ex where ex = Set.singleton $ P.Tokens $ pure expected expected = Tree0 (cell0 $ XML.NodeComment "") bool = RNC.rule "bool" $ RNC.text >>= \t -> case t of "true" -> return True "false" -> return False _ -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t int = RNC.rule "int" $ RNC.text >>= \t -> case readMaybe (TL.unpack t) of Just i -> return i Nothing -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t rational = RNC.rule "rational" $ RNC.text >>= \t -> case readMaybe (TL.unpack t) of Just (Rational i) | 0 <= i -> return i | otherwise -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t Nothing -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t -> case readMaybe (TL.unpack t) of Just (Rational i) | 0 <= i -> return i | otherwise -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t Nothing -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t nat = RNC.rule "nat" $ RNC.int >>= \i -> if i >= 0 then return $ Nat i else P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Nat i nat1 = RNC.rule "nat1" $ RNC.int >>= \i -> if i > 0 then return $ Nat1 i else P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Nat1 i parserElement :: XML.QName -> Parser a -> Cell XML.XMLs -> Parser a parserElement n p (Sourced state_locTCT ts) = do let mayNameOrFigureName | n == "aside" = Nothing -- NOTE: skip aside. | n == "figure" -- NOTE: special case renaming the current XML.Pos -- using the @type attribute to have positions like this: -- section1.Quote1 -- section1.Example1 -- section1.Quote2 -- instead of: -- section1.figure1 -- section1.figure2 -- section1.figure3 , Just ty <- getFirst $ (`foldMap` ts) $ \case Tree (unSourced -> XML.NodeAttr "type") xs | [Tree (Sourced _ (XML.NodeText t)) _] <- toList xs , Just ty <- XML.ncName $ XML.unescapeText t -> First $ Just ty _ -> First Nothing = Just $ XML.QName xmlns_dtc ty | otherwise = Just n case mayNameOrFigureName of Nothing -> do st <- S.get S.put st{state_locTCT} res <- parser p ts S.put st return res Just nameOrFigureName -> do st@State{state_posXML} <- S.get let incrPrecedingSibling name = maybe (Nat1 1) succNat1 $ Map.lookup name $ XML.pos_precedingSiblings state_posXML S.put State { state_posXML = state_posXML -- NOTE: in children, push current name incremented on ancestors -- and reset preceding siblings. { XML.pos_precedingSiblings = mempty , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n) , XML.pos_ancestorsWithFigureNames = XML.pos_ancestorsWithFigureNames state_posXML |> ( nameOrFigureName , incrPrecedingSibling nameOrFigureName ) } , state_locTCT } res <- parser p ts S.put st { state_posXML = state_posXML -- NOTE: after current, increment current name -- and reset ancestors. { XML.pos_precedingSiblings = (if n == nameOrFigureName then id else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $ Map.insertWith (const succNat1) n (Nat1 1) $ XML.pos_precedingSiblings state_posXML } } return res type instance RNC.Perm Parser = P.PermParser XML.XMLs Parser instance RNC.Sym_Interleaved Parser where interleaved = P.makePermParser (<$$>) = (P.<$$>) (<||>) = (P.<||>) (<$?>) = (P.<$?>) (<|?>) = (P.<|?>) f <$*> a = f P.<$?> ([],P.some a) f <|*> a = f P.<|?> ([],P.some a) instance DTC.Sym_DTC Parser where positionXML = S.gets state_posXML locationTCT = S.gets state_locTCT readDTC :: DTC.Sym_DTC Parser => XML.XMLs -> Either (P.ParseError (P.Token XML.XMLs) ErrorRead) DTC.Document readDTC = (fst <$>) . runParser def (P.initialPos "":|[]) DTC.document -- | @runParser st pos p xs@ runs a 'Parser' @p@ to parse @xs@ entirely, -- using state @st@ from position @pos@. runParser :: DTC.Sym_DTC Parser => State -> NonEmpty P.SourcePos -> Parser a -> XML.XMLs -> Either (P.ParseError (P.Token XML.XMLs) ErrorRead) (a, State) runParser st pos p inp = let p' = S.runStateT (p <* RNC.none) st in snd $ P.runParser' p' P.State { P.stateInput = inp , P.statePos = case Seq.viewl inp of Tree (Sourced ss _) _ :< _ -> (<$> ss) $ \FileRange{fileRange_begin=bp, fileRange_file} -> P.SourcePos fileRange_file (P.mkPos $ filePos_line bp) (P.mkPos $ filePos_column bp) EmptyL -> pos , P.stateTabWidth = P.pos1 , P.stateTokensProcessed = 0 } -- | @parser p xs@ returns a 'Parser' parsing @xs@ with @p@, -- applying 'setPosOnNextNode' in case of success. parser :: DTC.Sym_DTC Parser => Parser a -> XML.XMLs -> Parser a parser p xs = do st <- S.get P.State{P.statePos=pos} <- P.getParserState case runParser st pos p xs of Left (P.TrivialError statePos un ex) -> do -- NOTE: just re-raising exception. s <- P.getParserState P.setParserState s{P.statePos} P.failure un ex Left (P.FancyError statePos errs) -> do -- NOTE: just re-raising exception. s <- P.getParserState P.setParserState s{P.statePos} P.fancyFailure errs Right (a, st') -> do S.put st' a <$ setPosOnNextNode -- | Adjust the current 'P.SourcePos' -- to be the begining of the following-sibling 'XML' node -- so that on error 'P.toHints' keeps expected 'P.Token's in the list, -- and thus makes useful error messages. -- -- This is needed because the end of a 'Sourced' -- is not necessarily the begin of the next 'Sourced'. setPosOnNextNode :: Parser () setPosOnNextNode = do P.State { P.stateInput = inp , P.statePos = pos :| _ } <- P.getParserState case Seq.viewl inp of EmptyL -> return () t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XML.XMLs) pos t {- instance P.Stream XML.XMLs where type Token XML.XMLs = XML.XML type Tokens XML.XMLs = XML.XMLs take1_ s = case Seq.viewl s of Tree (unSourced -> XML.NodeComment{}) _ :< ts -> P.take1_ ts t: Just (t,ts) EmptyL -> Nothing positionAt1 _s pos (Tree (Sourced (FileRange{fileRange_begin=FilePos l c}:|_) _n) _ts) = pos{ P.sourceLine = P.mkPos l , P.sourceColumn = P.mkPos c } positionAtN s pos ts = case Seq.viewl ts of t :< _ -> P.positionAt1 s pos t EmptyL -> pos advance1 _s _indent pos (Tree (Sourced (FileRange{fileRange_end=FilePos l c}:|_) _n) _ts) = -- WARNING: the end of a 'Sourced' is not necessarily -- the beginning of the next 'Sourced'. pos{ P.sourceLine = P.mkPos l , P.sourceColumn = P.mkPos c } advanceN s = foldl' . P.advance1 s takeN_ n s | n <= 0 = Just (mempty, s) | null s = Nothing | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XML.NodeComment and XML.XmlInclude tokensToChunk _s = Seq.fromList chunkToTokens _s = toList chunkLength _s = Seq.length takeWhile_ = Seq.spanl -} instance P.ShowToken XML.XML where showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks where showTree :: XML.XML -> String showTree (Tree a _ts) = showSourced a $ \case XML.NodeAttr n -> "attribute "<>show (remove_XMLNS_DTC n) XML.NodeCDATA _t -> "cdata" XML.NodeComment _c -> "comment" XML.NodeElem n -> "element "<>show (remove_XMLNS_DTC n) XML.NodePI n _t -> "processing-instruction"<>show n XML.NodeText _t -> "text" remove_XMLNS_DTC n | XML.qNameSpace n == xmlns_dtc = n{XML.qNameSpace=""} | otherwise = n showSourced (Sourced path@(FileRange{fileRange_file} :| _) a) f = if null fileRange_file then f a else f a <> foldMap (\p -> "\n in "<>show p) path -- ** Type 'ErrorRead' data ErrorRead = ErrorRead_EndOfInput | ErrorRead_Not_Bool TL.Text | ErrorRead_Not_Int TL.Text | ErrorRead_Not_Nat Int | ErrorRead_Not_Nat1 Int | ErrorRead_Not_Rational TL.Text | ErrorRead_Not_Positive TL.Text {- ErrorRead_Unexpected P.sourcePos XML -} deriving (Eq,Ord,Show) instance P.ShowErrorComponent ErrorRead where showErrorComponent = show -- ** Type 'Rational' -- | Wrapper to change the 'Read' instance. newtype Rational = Rational Ratio.Rational instance Read Rational where readPrec = do x <- Read.step readPrec Read.expectP (Read.Symbol "/") y <- Read.step readPrec return $ Rational (x % y)