{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | Read DTC from TCT. module Textphile.DTC.Read.TCT where import Control.Applicative (Applicative(..), optional) import Control.Monad (Monad(..)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Either (Either(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), all) import Data.Function (($), (.), const, id) import Data.Functor ((<$>)) import Data.Int (Int) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..), maybe) import Data.Monoid (Monoid(..), First(..)) import Data.Ord (Ord(..)) import Data.Ratio ((%)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (ViewL(..), (|>)) import Data.String (String) import Data.Tuple (fst, snd) import Prelude (error) import Text.Blaze.DTC (xmlns_dtc) import Text.Read (readMaybe, Read(..)) import Text.Show (Show(..)) import qualified Control.Monad.Trans.State as S import qualified Data.Char as Char import qualified Data.List as List import qualified Data.List.NonEmpty as NonEmpty import qualified Data.Map.Strict as Map import qualified Data.Ratio as Ratio import qualified Data.Sequence as Seq import qualified Data.Set as Set import qualified Data.Text.Lazy as TL import qualified GHC.Read as Read (expectP) import qualified Symantic.RNC as RNC import qualified Symantic.XML as XML import qualified Text.Megaparsec as P import qualified Text.Read as Read import Textphile.TCT hiding (Parser, ErrorRead) import Textphile.XML (XML, XMLs) import Textphile.Utils (Nat(..), Nat1(..), succNat1) import qualified Textphile.DTC.Document as DTC import qualified Textphile.DTC.Sym as DTC import qualified Textphile.RNC as RNC import qualified Textphile.XML as XML import qualified Textphile.TCT.Cell as TCT readDTC :: DTC.Sym_DTC Parser => XMLs -> Either (P.ParseErrorBundle XMLs ErrorRead) DTC.Document readDTC stateInput = (fst <$>) $ snd $ P.runParser' (S.runStateT (DTC.document <* P.eof) (def::State)) P.State { P.stateInput , P.stateOffset = 0 , P.stateParseErrors = [] , P.statePosState = error "[BUG] validateXML: getSourcePos is not helpful here, please use annotated source locations" -- NOTE: reporting the node number is less helpful -- than the source text line and number where the node is; -- P.statePosState is only used by P.getSourcePos. } -- * Type 'State' data State = State { state_posXML :: XML.Pos , state_locTCT :: TCT.Location } deriving (Eq,Show) instance Default State where def = State { state_posXML = def , state_locTCT = def } -- * Type 'Parser' type Parser = S.StateT State (P.Parsec ErrorRead XMLs) instance RNC.Sym_Rule Parser where -- rule n p = P.dbg s p {-(p P. s)-} where s = Text.unpack n rule _n = id arg _n = pure () instance RNC.Sym_RNC Parser where namespace _p _n = pure () element n p = do ts <- P.token check (Set.singleton $ P.Tokens $ pure expected) p_element n p ts where expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty check (XML.Tree (XML.Sourced src (XML.NodeElem e)) ts) | e == n = Just $ XML.Sourced src $ removePI $ removeXMLNS $ removeSpaces ts where removePI xs = (`Seq.filter` xs) $ \case XML.Tree (XML.unSourced -> XML.NodePI{}) _ts -> False _ -> True removeSpaces xs = if (`all` xs) $ \case XML.Tree (XML.unSourced -> XML.NodeText (XML.EscapedText et)) _ts -> all (\case XML.EscapedPlain t -> TL.all Char.isSpace t _ -> False) et _ -> True then (`Seq.filter` xs) $ \case XML.Tree (XML.unSourced -> XML.NodeText{}) _ts -> False _ -> True else xs removeXMLNS xs = let (attrs,rest) = (`Seq.spanl` xs) $ \case XML.Tree (XML.unSourced -> XML.NodeAttr{}) _ts -> True _ -> False in let attrs' = (`Seq.filter` attrs) $ \case XML.Tree (XML.unSourced -> XML.NodeAttr a) _ts -> case a of XML.QName "" "xmlns" -> False XML.QName ns _l -> ns /= XML.xmlns_xmlns _ -> True in attrs' <> rest check _t = Nothing attribute n p = do ts <- P.token check (Set.singleton $ P.Tokens $ pure expected) p_XMLs p ts where expected = XML.Tree0 (XML.notSourced $ XML.NodeAttr n) check (XML.Tree (XML.unSourced -> XML.NodeAttr k) v@(toList -> [XML.Tree0 (XML.unSourced -> XML.NodeText _v)])) | k == n = Just v check _t = Nothing any = P.label "any" $ P.token (const $ Just ()) Set.empty anyElem ns p = P.label "anyElem" $ do (n,ts) <- P.token check $ Set.singleton $ P.Tokens $ pure expected p_XMLs (p $ XML.qNameLocal n) ts where expected = XML.Tree (XML.notSourced $ XML.NodeElem $ XML.QName ns $ XML.NCName "*") mempty check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts) | XML.qNameSpace e == ns = Just $ (e,ts) check _t = Nothing escapedText = do P.token check $ Set.singleton $ P.Tokens $ pure expected where expected = XML.Tree0 (XML.notSourced $ XML.NodeText $ XML.EscapedText mempty) check (XML.Tree0 (XML.unSourced -> XML.NodeText t)) = Just t check _t = Nothing optional = P.optional option = P.option choice = P.choice try = P.try fail = P.label "fail" $ P.failure Nothing mempty type instance RNC.Permutation Parser = RNC.Perm Parser instance RNC.Sym_Permutation Parser where runPermutation (RNC.Perm value parser) = optional parser >>= f where -- NOTE: copy Control.Applicative.Permutations.runPermutation -- to replace the commented empty below so that P.TrivialError -- has the unexpected token. f Nothing = maybe {-empty-}(P.token (const Nothing) Set.empty) pure value f (Just p) = RNC.runPermutation p toPermutation p = RNC.Perm Nothing $ pure <$> p toPermutationWithDefault v p = RNC.Perm (Just v) $ pure <$> p instance P.Stream XMLs where type Token XMLs = XML type Tokens XMLs = XMLs take1_ s = case Seq.viewl s of EmptyL -> Nothing t@(XML.Tree XML.Sourced{XML.unSourced=n} _) :< ts | RNC.isIgnoredNode n -> P.take1_ ts | otherwise -> Just (t, ts) takeN_ n s | n <= 0 = Just (mempty, s) | null s = Nothing | otherwise = let (ns,rs) = Seq.splitAt n s in let (ko,ok) = Seq.partition (RNC.isIgnoredNode . XML.unSourced . XML.unTree) ns in case P.takeN_ (Seq.length ko) rs of Nothing -> Just (ok, rs) Just (ns',rs') -> Just (ok<>ns', rs') tokensToChunk _s = Seq.fromList chunkToTokens _s = toList chunkLength _s = Seq.length takeWhile_ = Seq.spanl -- | NOTE: unimplemented: useless since each 'XML.Node' is annotated with its 'FileSource'. reachOffset = error "[BUG] P.Stream XMLs: reachOffset is not helpful, please use annotated source locations" -- | NOTE: useless since each 'XML.Node' is annotated with its 'FileSource'. reachOffsetNoLine = error "[BUG] P.Stream XMLs: reachOffsetNoLine is not helpful, please use annotated source locations" showTokens _s toks = List.intercalate ", " $ toList $ showTree <$> toks where showTree :: XML -> String showTree (Tree a _ts) = showSourced a $ \case XML.NodeAttr n -> show (remove_XMLNS_DTC n)<>"=" XML.NodeCDATA _t -> "cdata" XML.NodeComment _c -> "comment" XML.NodeElem n -> "<"<>show (remove_XMLNS_DTC n)<>">" XML.NodePI n _t -> "processing-instruction"<>show n XML.NodeText _t -> "text" remove_XMLNS_DTC n | XML.qNameSpace n == xmlns_dtc = n{XML.qNameSpace=""} | otherwise = n showSourced (Sourced path@(FileRange{fileRange_file} :| _) a) f = if null fileRange_file then f a else f a <> foldMap (\p -> "\n in "<>show p) path -- | @p_XMLs p xs@ returns a parser parsing @xs@ entirely with @p@, -- updating 'P.stateOffset' and re-raising any exception. p_XMLs :: Parser a -> XMLs -> Parser a p_XMLs p stateInput = do s <- S.get st <- P.getParserState let (st', res) = P.runParser' (S.runStateT (p <* P.eof) s) P.State { P.stateInput = stateInput , P.stateOffset = P.stateOffset st , P.statePosState = P.PosState { P.pstateInput = stateInput , P.pstateOffset = P.stateOffset st , P.pstateSourcePos = P.pstateSourcePos $ P.statePosState st , P.pstateTabWidth = P.pos1 , P.pstateLinePrefix = "" } } P.updateParserState (\ps -> ps{P.stateOffset = P.stateOffset st'}) case res of Right (a, s') -> do S.put s' return a Left (P.ParseErrorBundle errs _) -> case NonEmpty.head errs of P.TrivialError _o us es -> P.failure us es {- lift $ P.ParsecT $ \ps _cok cerr _eok _eerr -> cerr (P.TrivialError o us es) ps -} P.FancyError _o es -> P.fancyFailure es p_element :: XML.QName -> Parser a -> Cell XMLs -> Parser a p_element n p (Sourced state_locTCT ts) = do let mayNameOrFigureName | n == "aside" = Nothing -- NOTE: skip aside. | n == "figure" -- NOTE: special case renaming the current XML.Pos -- using the @type attribute to have positions like this: -- section1.Quote1 -- section1.Example1 -- section1.Quote2 -- instead of: -- section1.figure1 -- section1.figure2 -- section1.figure3 , Just ty <- getFirst $ (`foldMap` ts) $ \case Tree (unSourced -> XML.NodeAttr "type") xs | [Tree (Sourced _ (XML.NodeText t)) _] <- toList xs , Just ty <- XML.ncName $ XML.unescapeText t -> First $ Just ty _ -> First Nothing = Just $ XML.QName xmlns_dtc ty | otherwise = Just n case mayNameOrFigureName of Nothing -> do st <- S.get S.put st{state_locTCT} res <- p_XMLs p ts S.put st return res Just nameOrFigureName -> do st@State{state_posXML} <- S.get let incrPrecedingSibling name = maybe (Nat1 1) succNat1 $ Map.lookup name $ XML.pos_precedingSiblings state_posXML S.put State { state_posXML = state_posXML -- NOTE: in children, push current name incremented on ancestors -- and reset preceding siblings. { XML.pos_precedingSiblings = mempty , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n) , XML.pos_ancestorsWithFigureNames = XML.pos_ancestorsWithFigureNames state_posXML |> ( nameOrFigureName , incrPrecedingSibling nameOrFigureName ) } , state_locTCT } res <- p_XMLs p ts S.put st { state_posXML = state_posXML -- NOTE: after current, increment current name -- and reset ancestors. { XML.pos_precedingSiblings = (if n == nameOrFigureName then id else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $ Map.insertWith (const succNat1) n (Nat1 1) $ XML.pos_precedingSiblings state_posXML } } return res instance RNC.Sym_RNC_Extra Parser where none = RNC.rule "none" $ P.eof comment = do s <- P.getInput case Seq.viewl s of Tree0 (unSourced -> XML.NodeComment c) :< ts -> do P.setInput ts return c t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex EmptyL -> P.failure Nothing ex where ex = Set.singleton $ P.Tokens $ pure expected expected = Tree0 (cell0 $ XML.NodeComment "") bool = RNC.rule "bool" $ RNC.text >>= \t -> case t of "true" -> return True "false" -> return False _ -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t int = RNC.rule "int" $ RNC.text >>= \t -> case readMaybe (TL.unpack t) of Just i -> return i Nothing -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t rational = RNC.rule "rational" $ RNC.text >>= \t -> case readMaybe (TL.unpack t) of Just (Rational i) | 0 <= i -> return i | otherwise -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t Nothing -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t -> case readMaybe (TL.unpack t) of Just (Rational i) | 0 <= i -> return i | otherwise -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t Nothing -> P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t nat = RNC.rule "nat" $ RNC.int >>= \i -> if i >= 0 then return $ Nat i else P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Nat i nat1 = RNC.rule "nat1" $ RNC.int >>= \i -> if i > 0 then return $ Nat1 i else P.fancyFailure $ Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Nat1 i instance DTC.Sym_DTC Parser where positionXML = S.gets state_posXML locationTCT = S.gets state_locTCT -- ** Type 'ErrorRead' data ErrorRead = ErrorRead_EndOfInput | ErrorRead_Not_Bool TL.Text | ErrorRead_Not_Int TL.Text | ErrorRead_Not_Nat Int | ErrorRead_Not_Nat1 Int | ErrorRead_Not_Rational TL.Text | ErrorRead_Not_Positive TL.Text deriving (Eq,Ord,Show) instance P.ShowErrorComponent ErrorRead where showErrorComponent = show -- ** Type 'Rational' -- | Wrapper to change the 'Read' instance. newtype Rational = Rational Ratio.Rational instance Read Rational where readPrec = do x <- Read.step readPrec Read.expectP (Read.Symbol "/") y <- Read.step readPrec return $ Rational (x % y)