{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | Read DTC from TCT. module Hdoc.DTC.Read.TCT where import Control.Applicative (Applicative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Either (Either(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.), const, id) import Data.Functor ((<$>), (<$)) import Data.Int (Int) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..), maybe) import Data.Monoid (Monoid(..), First(..)) import Data.Ord (Ord(..)) import Data.Proxy (Proxy(..)) import Data.Ratio ((%)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (ViewL(..), (|>)) import Data.String (String) import Data.Tuple (fst, snd) import Prelude (succ) import Text.Read (readMaybe, Read(..)) import Text.Show (Show(..)) import qualified Control.Monad.Trans.State as S import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Ratio as Ratio import qualified Data.Sequence as Seq import qualified Data.Set as Set import qualified Data.Text.Lazy as TL import qualified GHC.Read as Read (expectP) import qualified Text.Megaparsec as P import qualified Text.Megaparsec.Perm as P import qualified Text.Read as Read import Hdoc.Utils () import Hdoc.TCT hiding (Parser, ErrorRead) import Hdoc.XML as XML import qualified Hdoc.DTC.Document as DTC import qualified Hdoc.DTC.Sym as DTC import qualified Hdoc.RNC.Sym as RNC import qualified Hdoc.TCT.Cell as TCT -- * Type 'State' data State = State { state_xmlPos :: DTC.XmlPos , state_tctPos :: TCT.Spans -- ^ Unfortunately Megaparsec's 'P.statePos' -- is not a good fit to encode 'TCT.Span's. } deriving (Eq,Show) instance Default State where def = State { state_xmlPos = def , state_tctPos = def } -- * Type 'Parser' type Parser = S.StateT State (P.Parsec ErrorRead XMLs) instance RNC.Sym_Rule Parser where -- rule n p = P.dbg s p {-(p P. s)-} where s = Text.unpack n rule _n = id instance RNC.Sym_RNC Parser where none = P.label "none" $ P.eof fail = P.label "fail" $ P.failure Nothing mempty any = P.label "any" $ p_satisfyMaybe $ const $ Just () anyElem p = P.label "anyElem" $ do Cell state_tctPos (n,ts) <- P.token check $ Just expected parserElement n (p n) (Cell state_tctPos ts) where expected = Tree (cell0 $ XmlElem "*") mempty check (Tree cell@(unCell -> XmlElem e) ts) = Right $ (e,ts) <$ cell check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) element n p = do ts <- P.token check $ Just expected parserElement n p ts where expected = Tree (cell0 $ XmlElem n) mempty check (Tree cell@(unCell -> XmlElem e) ts) | e == n = Right (ts <$ cell) check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) attribute n p = do v <- P.token check $ Just expected parser p $ Seq.singleton $ Tree0 v where expected = Tree0 (cell0 $ XmlAttr n "") check (Tree0 cell@(unCell -> XmlAttr k v)) | k == n = Right $ XmlText v <$ cell check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) comment = do s <- P.getInput case Seq.viewl s of Tree0 (unCell -> XmlComment c) :< ts -> do P.setInput ts c <$ setPosOnNextNode t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex EmptyL -> P.failure Nothing ex where ex = Set.singleton $ P.Tokens $ pure expected expected = Tree0 (cell0 $ XmlComment "") text = do P.token check (Just expected) <* setPosOnNextNode where expected = Tree0 (cell0 $ XmlText "") check (Tree0 (unCell -> XmlText t)) = Right t check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) int = RNC.rule "int" $ RNC.text >>= \t -> case readMaybe (TL.unpack t) of Just i -> return i Nothing -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t rational = RNC.rule "rational" $ RNC.text >>= \t -> case readMaybe (TL.unpack t) of Just (Rational i) | 0 <= i -> return i | otherwise -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t Nothing -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t -> case readMaybe (TL.unpack t) of Just (Rational i) | 0 <= i -> return i | otherwise -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t Nothing -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t nat = RNC.rule "nat" $ RNC.int >>= \i -> if i >= 0 then return $ Nat i else P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Nat i nat1 = RNC.rule "nat1" $ RNC.int >>= \i -> if i > 0 then return $ Nat1 i else P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Nat1 i (<|>) = (P.<|>) many = P.many some = P.some optional = P.optional option = P.option choice = P.choice try = P.try parserElement :: XmlName -> Parser a -> Cell XMLs -> Parser a parserElement n p (Cell state_tctPos ts) = do let mayNameOrFigureName | n == "aside" = Nothing -- NOTE: skip aside. | n == "figure" -- NOTE: special case renaming the current XmlPos -- using the @type attribute to have positions like this: -- section1.Quote1 -- section1.Example1 -- section1.Quote2 -- instead of: -- section1.figure1 -- section1.figure2 -- section1.figure3 , Just ty <- getFirst $ (`foldMap` ts) $ \case Tree0 (unCell -> XmlAttr "type" ty) -> First $ Just ty _ -> First Nothing = Just $ xmlLocalName $ ty | otherwise = Just n case mayNameOrFigureName of Nothing -> do st <- S.get S.put st{state_tctPos} res <- parser p ts S.put st return res Just nameOrFigureName -> do st@State{state_xmlPos} <- S.get let incrPrecedingSibling name = maybe 1 succ $ Map.lookup name $ xmlPos_PrecedingSiblings state_xmlPos S.put State { state_xmlPos = state_xmlPos -- NOTE: in children, push current name incremented on ancestors -- and reset preceding siblings. { xmlPos_PrecedingSiblings = mempty , xmlPos_Ancestors = xmlPos_Ancestors state_xmlPos |> (n, incrPrecedingSibling n) , xmlPos_AncestorsWithFigureNames = xmlPos_AncestorsWithFigureNames state_xmlPos |> ( nameOrFigureName , incrPrecedingSibling nameOrFigureName ) } , state_tctPos } res <- parser p ts S.put st { state_xmlPos = state_xmlPos -- NOTE: after current, increment current name -- and reset ancestors. { xmlPos_PrecedingSiblings = (if n == nameOrFigureName then id else Map.insertWith (const succ) nameOrFigureName 1) $ Map.insertWith (const succ) n 1 $ xmlPos_PrecedingSiblings state_xmlPos } } return res type instance RNC.Perm Parser = P.PermParser XMLs Parser instance RNC.Sym_Interleaved Parser where interleaved = P.makePermParser (<$$>) = (P.<$$>) (<||>) = (P.<||>) (<$?>) = (P.<$?>) (<|?>) = (P.<|?>) f <$*> a = f P.<$?> ([],P.some a) f <|*> a = f P.<|?> ([],P.some a) instance DTC.Sym_DTC Parser where posXML = S.gets state_xmlPos posTCT = S.gets state_tctPos readDTC :: DTC.Sym_DTC Parser => XMLs -> Either (P.ParseError (P.Token XMLs) ErrorRead) DTC.Document readDTC = (fst <$>) . runParser def (P.initialPos "":|[]) DTC.document -- | @runParser st pos p xs@ runs a 'Parser' @p@ to parse @xs@ entirely, -- using state @st@ from position @pos@. runParser :: DTC.Sym_DTC Parser => State -> NonEmpty P.SourcePos -> Parser a -> XMLs -> Either (P.ParseError (P.Token XMLs) ErrorRead) (a, State) runParser st pos p inp = let p' = S.runStateT (p <* RNC.none) st in snd $ P.runParser' p' P.State { P.stateInput = inp , P.statePos = case Seq.viewl inp of Tree (Cell ss _) _ :< _ -> (<$> ss) $ \Span{span_begin=bp, span_file} -> P.SourcePos span_file (P.mkPos $ pos_line bp) (P.mkPos $ pos_column bp) EmptyL -> pos , P.stateTabWidth = P.pos1 , P.stateTokensProcessed = 0 } -- | @parser p xs@ returns a 'Parser' parsing @xs@ with @p@, -- applying 'setPosOnNextNode' in case of success. parser :: DTC.Sym_DTC Parser => Parser a -> XMLs -> Parser a parser p xs = do st <- S.get P.State{P.statePos=pos} <- P.getParserState case runParser st pos p xs of Left (P.TrivialError statePos un ex) -> do -- NOTE: just re-raising exception. s <- P.getParserState P.setParserState s{P.statePos} P.failure un ex Left (P.FancyError statePos errs) -> do -- NOTE: just re-raising exception. s <- P.getParserState P.setParserState s{P.statePos} P.fancyFailure errs Right (a, st') -> do S.put st' a <$ setPosOnNextNode -- | Adjust the current 'P.SourcePos' -- to be the begining of the following-sibling 'XML' node -- so that on error 'P.toHints' keeps expected 'P.Token's in the list, -- and thus makes useful error messages. -- -- This is needed because the end of a 'Cell' -- is not necessarily the begin of the next 'Cell'. setPosOnNextNode :: Parser () setPosOnNextNode = do P.State { P.stateInput = inp , P.statePos = pos :| _ } <- P.getParserState case Seq.viewl inp of EmptyL -> return () t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XMLs) pos t instance P.Stream XMLs where type Token XMLs = XML type Tokens XMLs = XMLs take1_ s = case Seq.viewl s of Tree (unCell -> XmlComment{}) _ :< ts -> P.take1_ ts t: Just (t,ts) EmptyL -> Nothing positionAt1 _s pos (Tree (Cell (Span{span_begin=Pos l c}:|_) _n) _ts) = pos{ P.sourceLine = P.mkPos l , P.sourceColumn = P.mkPos c } positionAtN s pos ts = case Seq.viewl ts of t :< _ -> P.positionAt1 s pos t EmptyL -> pos advance1 _s _indent pos (Tree (Cell (Span{span_end=Pos l c}:|_) _n) _ts) = -- WARNING: the end of a 'Cell' is not necessarily -- the beginning of the next 'Cell'. pos{ P.sourceLine = P.mkPos l , P.sourceColumn = P.mkPos c } advanceN s = foldl' . P.advance1 s takeN_ n s | n <= 0 = Just (mempty, s) | null s = Nothing | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XmlComment and XmlInclude tokensToChunk _s = Seq.fromList chunkToTokens _s = toList chunkLength _s = Seq.length takeWhile_ = Seq.spanl instance P.ShowToken XML where showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks where showTree :: XML -> String showTree (Tree a _ts) = showCell a $ \case XmlElem n -> "<"<>show n<>">" XmlAttr n _v -> show n<>"=" XmlText _t -> "text" XmlComment _c -> "comment" showCell (Cell path@(Span{span_file} :| _) a) f = if null span_file then f a else f a <> foldMap (\p -> "\n in "<>show p) path -- ** Type 'ErrorRead' data ErrorRead = ErrorRead_EndOfInput | ErrorRead_Not_Int TL.Text | ErrorRead_Not_Nat Int | ErrorRead_Not_Nat1 Int | ErrorRead_Not_Rational TL.Text | ErrorRead_Not_Positive TL.Text -- | ErrorRead_Unexpected P.sourcePos XML deriving (Eq,Ord,Show) instance P.ShowErrorComponent ErrorRead where showErrorComponent = show -- ** Type 'Rational' -- | Wrapper to change the 'Read' instance. newtype Rational = Rational Ratio.Rational instance Read Rational where readPrec = do x <- Read.step readPrec Read.expectP (Read.Symbol "/") y <- Read.step readPrec return $ Rational (x % y)