{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | Read DTC from TCT. module Hdoc.DTC.Read.TCT where import Control.Applicative (Applicative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Either (Either(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.), const, id) import Data.Functor ((<$>), (<$)) import Data.Int (Int) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..), maybe) import Data.Monoid (Monoid(..), First(..)) import Data.Ord (Ord(..)) import Data.Proxy (Proxy(..)) import Data.Ratio ((%)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (ViewL(..), (|>)) import Data.String (String) import Data.Tuple (fst, snd) import Text.Read (readMaybe, Read(..)) import Text.Show (Show(..)) import qualified Control.Monad.Trans.State as S import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Ratio as Ratio import qualified Data.Sequence as Seq import qualified Data.Set as Set import qualified Data.Text.Lazy as TL import qualified GHC.Read as Read (expectP) import qualified Text.Megaparsec as P import qualified Text.Megaparsec.Perm as P import qualified Text.Read as Read import Hdoc.TCT hiding (Parser, ErrorRead) import Hdoc.Utils (Nat(..), Nat1(..), succNat1) import qualified Hdoc.XML as XML import qualified Hdoc.DTC.Document as DTC import qualified Hdoc.DTC.Sym as DTC import qualified Hdoc.RNC.Sym as RNC import qualified Hdoc.TCT.Cell as TCT -- * Type 'State' data State = State { state_posXML :: XML.Pos , state_locTCT :: TCT.Location -- ^ Unfortunately Megaparsec's 'P.statePos' -- is not a good fit to encode 'TCT.Location'. } deriving (Eq,Show) instance Default State where def = State { state_posXML = def , state_locTCT = def } -- * Type 'Parser' type Parser = S.StateT State (P.Parsec ErrorRead XML.XMLs) instance RNC.Sym_Rule Parser where -- rule n p = P.dbg s p {-(p P. s)-} where s = Text.unpack n rule _n = id instance RNC.Sym_RNC Parser where none = P.label "none" $ P.eof fail = P.label "fail" $ P.failure Nothing mempty any = P.label "any" $ p_satisfyMaybe $ const $ Just () anyElem p = P.label "anyElem" $ do Cell state_locTCT (n,ts) <- P.token check $ Just expected parserElement n (p n) (Cell state_locTCT ts) where expected = Tree (cell0 $ XML.NodeElem "*") mempty check (Tree cell@(unCell -> XML.NodeElem e) ts) = Right $ (e,ts) <$ cell check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) element n p = do ts <- P.token check $ Just expected parserElement n p ts where expected = Tree (cell0 $ XML.NodeElem n) mempty check (Tree cell@(unCell -> XML.NodeElem e) ts) | e == n = Right (ts <$ cell) check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) attribute n p = do v <- P.token check $ Just expected parser p $ Seq.singleton $ Tree0 v where expected = Tree0 (cell0 $ XML.NodeAttr n "") check (Tree0 cell@(unCell -> XML.NodeAttr k v)) | k == n = Right $ XML.NodeText v <$ cell check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) comment = do s <- P.getInput case Seq.viewl s of Tree0 (unCell -> XML.NodeComment c) :< ts -> do P.setInput ts c <$ setPosOnNextNode t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex EmptyL -> P.failure Nothing ex where ex = Set.singleton $ P.Tokens $ pure expected expected = Tree0 (cell0 $ XML.NodeComment "") text = do P.token check (Just expected) <* setPosOnNextNode where expected = Tree0 (cell0 $ XML.NodeText "") check (Tree0 (unCell -> XML.NodeText t)) = Right t check t = Left ( Just $ P.Tokens $ pure t , Set.singleton $ P.Tokens $ pure expected ) bool = RNC.rule "bool" $ RNC.text >>= \t -> case t of "true" -> return True "false" -> return False _ -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t int = RNC.rule "int" $ RNC.text >>= \t -> case readMaybe (TL.unpack t) of Just i -> return i Nothing -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t rational = RNC.rule "rational" $ RNC.text >>= \t -> case readMaybe (TL.unpack t) of Just (Rational i) | 0 <= i -> return i | otherwise -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t Nothing -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t -> case readMaybe (TL.unpack t) of Just (Rational i) | 0 <= i -> return i | otherwise -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t Nothing -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t nat = RNC.rule "nat" $ RNC.int >>= \i -> if i >= 0 then return $ Nat i else P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Nat i nat1 = RNC.rule "nat1" $ RNC.int >>= \i -> if i > 0 then return $ Nat1 i else P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Nat1 i (<|>) = (P.<|>) many = P.many some = P.some optional = P.optional option = P.option choice = P.choice try = P.try parserElement :: XML.Name -> Parser a -> Cell XML.XMLs -> Parser a parserElement n p (Cell state_locTCT ts) = do let mayNameOrFigureName | n == "aside" = Nothing -- NOTE: skip aside. | n == "figure" -- NOTE: special case renaming the current XML.XmlPos -- using the @type attribute to have positions like this: -- section1.Quote1 -- section1.Example1 -- section1.Quote2 -- instead of: -- section1.figure1 -- section1.figure2 -- section1.figure3 , Just ty <- getFirst $ (`foldMap` ts) $ \case Tree0 (unCell -> XML.NodeAttr "type" ty) -> First $ Just ty _ -> First Nothing = Just $ XML.localName $ ty | otherwise = Just n case mayNameOrFigureName of Nothing -> do st <- S.get S.put st{state_locTCT} res <- parser p ts S.put st return res Just nameOrFigureName -> do st@State{state_posXML} <- S.get let incrPrecedingSibling name = maybe (Nat1 1) succNat1 $ Map.lookup name $ XML.pos_precedingSiblings state_posXML S.put State { state_posXML = state_posXML -- NOTE: in children, push current name incremented on ancestors -- and reset preceding siblings. { XML.pos_precedingSiblings = mempty , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n) , XML.pos_ancestorsWithFigureNames = XML.pos_ancestorsWithFigureNames state_posXML |> ( nameOrFigureName , incrPrecedingSibling nameOrFigureName ) } , state_locTCT } res <- parser p ts S.put st { state_posXML = state_posXML -- NOTE: after current, increment current name -- and reset ancestors. { XML.pos_precedingSiblings = (if n == nameOrFigureName then id else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $ Map.insertWith (const succNat1) n (Nat1 1) $ XML.pos_precedingSiblings state_posXML } } return res type instance RNC.Perm Parser = P.PermParser XML.XMLs Parser instance RNC.Sym_Interleaved Parser where interleaved = P.makePermParser (<$$>) = (P.<$$>) (<||>) = (P.<||>) (<$?>) = (P.<$?>) (<|?>) = (P.<|?>) f <$*> a = f P.<$?> ([],P.some a) f <|*> a = f P.<|?> ([],P.some a) instance DTC.Sym_DTC Parser where positionXML = S.gets state_posXML locationTCT = S.gets state_locTCT readDTC :: DTC.Sym_DTC Parser => XML.XMLs -> Either (P.ParseError (P.Token XML.XMLs) ErrorRead) DTC.Document readDTC = (fst <$>) . runParser def (P.initialPos "":|[]) DTC.document -- | @runParser st pos p xs@ runs a 'Parser' @p@ to parse @xs@ entirely, -- using state @st@ from position @pos@. runParser :: DTC.Sym_DTC Parser => State -> NonEmpty P.SourcePos -> Parser a -> XML.XMLs -> Either (P.ParseError (P.Token XML.XMLs) ErrorRead) (a, State) runParser st pos p inp = let p' = S.runStateT (p <* RNC.none) st in snd $ P.runParser' p' P.State { P.stateInput = inp , P.statePos = case Seq.viewl inp of Tree (Cell ss _) _ :< _ -> (<$> ss) $ \Span{span_begin=bp, span_file} -> P.SourcePos span_file (P.mkPos $ pos_line bp) (P.mkPos $ pos_column bp) EmptyL -> pos , P.stateTabWidth = P.pos1 , P.stateTokensProcessed = 0 } -- | @parser p xs@ returns a 'Parser' parsing @xs@ with @p@, -- applying 'setPosOnNextNode' in case of success. parser :: DTC.Sym_DTC Parser => Parser a -> XML.XMLs -> Parser a parser p xs = do st <- S.get P.State{P.statePos=pos} <- P.getParserState case runParser st pos p xs of Left (P.TrivialError statePos un ex) -> do -- NOTE: just re-raising exception. s <- P.getParserState P.setParserState s{P.statePos} P.failure un ex Left (P.FancyError statePos errs) -> do -- NOTE: just re-raising exception. s <- P.getParserState P.setParserState s{P.statePos} P.fancyFailure errs Right (a, st') -> do S.put st' a <$ setPosOnNextNode -- | Adjust the current 'P.SourcePos' -- to be the begining of the following-sibling 'XML' node -- so that on error 'P.toHints' keeps expected 'P.Token's in the list, -- and thus makes useful error messages. -- -- This is needed because the end of a 'Cell' -- is not necessarily the begin of the next 'Cell'. setPosOnNextNode :: Parser () setPosOnNextNode = do P.State { P.stateInput = inp , P.statePos = pos :| _ } <- P.getParserState case Seq.viewl inp of EmptyL -> return () t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XML.XMLs) pos t instance P.Stream XML.XMLs where type Token XML.XMLs = XML.XML type Tokens XML.XMLs = XML.XMLs take1_ s = case Seq.viewl s of Tree (unCell -> XML.NodeComment{}) _ :< ts -> P.take1_ ts t: Just (t,ts) EmptyL -> Nothing positionAt1 _s pos (Tree (Cell (Span{span_begin=Pos l c}:|_) _n) _ts) = pos{ P.sourceLine = P.mkPos l , P.sourceColumn = P.mkPos c } positionAtN s pos ts = case Seq.viewl ts of t :< _ -> P.positionAt1 s pos t EmptyL -> pos advance1 _s _indent pos (Tree (Cell (Span{span_end=Pos l c}:|_) _n) _ts) = -- WARNING: the end of a 'Cell' is not necessarily -- the beginning of the next 'Cell'. pos{ P.sourceLine = P.mkPos l , P.sourceColumn = P.mkPos c } advanceN s = foldl' . P.advance1 s takeN_ n s | n <= 0 = Just (mempty, s) | null s = Nothing | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XML.NodeComment and XML.XmlInclude tokensToChunk _s = Seq.fromList chunkToTokens _s = toList chunkLength _s = Seq.length takeWhile_ = Seq.spanl instance P.ShowToken XML.XML where showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks where showTree :: XML.XML -> String showTree (Tree a _ts) = showCell a $ \case XML.NodeElem n -> "<"<>show n<>">" XML.NodeAttr n _v -> show n<>"=" XML.NodeText _t -> "text" XML.NodeComment _c -> "comment" showCell (Cell path@(Span{span_file} :| _) a) f = if null span_file then f a else f a <> foldMap (\p -> "\n in "<>show p) path -- ** Type 'ErrorRead' data ErrorRead = ErrorRead_EndOfInput | ErrorRead_Not_Bool TL.Text | ErrorRead_Not_Int TL.Text | ErrorRead_Not_Nat Int | ErrorRead_Not_Nat1 Int | ErrorRead_Not_Rational TL.Text | ErrorRead_Not_Positive TL.Text {- ErrorRead_Unexpected P.sourcePos XML -} deriving (Eq,Ord,Show) instance P.ShowErrorComponent ErrorRead where showErrorComponent = show -- ** Type 'Rational' -- | Wrapper to change the 'Read' instance. newtype Rational = Rational Ratio.Rational instance Read Rational where readPrec = do x <- Read.step readPrec Read.expectP (Read.Symbol "/") y <- Read.step readPrec return $ Rational (x % y)