1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Hdoc.DTC.Read.TCT where
11 import Control.Applicative (Applicative(..))
12 import Control.Monad (Monad(..))
14 import Data.Default.Class (Default(..))
15 import Data.Either (Either(..))
16 import Data.Eq (Eq(..))
17 import Data.Foldable (Foldable(..))
18 import Data.Function (($), (.), const, id)
19 import Data.Functor ((<$>), (<$))
21 import Data.List.NonEmpty (NonEmpty(..))
22 import Data.Maybe (Maybe(..), maybe)
23 import Data.Monoid (Monoid(..), First(..))
24 import Data.Ord (Ord(..))
25 import Data.Proxy (Proxy(..))
26 import Data.Ratio ((%))
27 import Data.Semigroup (Semigroup(..))
28 import Data.Sequence (ViewL(..), (|>))
29 import Data.String (String)
30 import Data.Tuple (fst, snd)
32 import Text.Read (readMaybe, Read(..))
33 import Text.Show (Show(..))
34 import qualified Control.Monad.Trans.State as S
35 import qualified Data.List as List
36 import qualified Data.Map.Strict as Map
37 import qualified Data.Ratio as Ratio
38 import qualified Data.Sequence as Seq
39 import qualified Data.Set as Set
40 import qualified Data.Text.Lazy as TL
41 import qualified GHC.Read as Read (expectP)
42 import qualified Text.Megaparsec as P
43 import qualified Text.Megaparsec.Perm as P
44 import qualified Text.Read as Read
47 import Hdoc.TCT hiding (Parser, ErrorRead)
48 import Hdoc.XML as XML
49 import qualified Hdoc.DTC.Document as DTC
50 import qualified Hdoc.DTC.Sym as DTC
51 import qualified Hdoc.RNC.Sym as RNC
52 import qualified Hdoc.TCT.Cell as TCT
56 { state_xmlPos :: DTC.XmlPos
57 , state_tctPos :: TCT.Spans
58 -- ^ Unfortunately Megaparsec's 'P.statePos'
59 -- is not a good fit to encode 'TCT.Span's.
61 instance Default State where
68 type Parser = S.StateT State (P.Parsec ErrorRead XMLs)
70 instance RNC.Sym_Rule Parser where
71 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
73 instance RNC.Sym_RNC Parser where
74 none = P.label "none" $ P.eof
75 fail = P.label "fail" $ P.failure Nothing mempty
76 any = P.label "any" $ p_satisfyMaybe $ const $ Just ()
77 anyElem p = P.label "anyElem" $ do
78 Cell state_tctPos (n,ts) <- P.token check $ Just expected
79 parserElement n (p n) (Cell state_tctPos ts)
81 expected = Tree (cell0 $ XmlElem "*") mempty
82 check (Tree cell@(unCell -> XmlElem e) ts) = Right $ (e,ts) <$ cell
84 ( Just $ P.Tokens $ pure t
85 , Set.singleton $ P.Tokens $ pure expected )
87 ts <- P.token check $ Just expected
90 expected = Tree (cell0 $ XmlElem n) mempty
91 check (Tree cell@(unCell -> XmlElem e) ts) | e == n = Right (ts <$ cell)
93 ( Just $ P.Tokens $ pure t
94 , Set.singleton $ P.Tokens $ pure expected )
96 v <- P.token check $ Just expected
97 parser p $ Seq.singleton $ Tree0 v
99 expected = Tree0 (cell0 $ XmlAttr n "")
100 check (Tree0 cell@(unCell -> XmlAttr k v)) | k == n =
101 Right $ XmlText v <$ cell
103 ( Just $ P.Tokens $ pure t
104 , Set.singleton $ P.Tokens $ pure expected )
108 Tree0 (unCell -> XmlComment c) :< ts -> do
110 c <$ setPosOnNextNode
111 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
112 EmptyL -> P.failure Nothing ex
114 ex = Set.singleton $ P.Tokens $ pure expected
115 expected = Tree0 (cell0 $ XmlComment "")
117 P.token check (Just expected)
120 expected = Tree0 (cell0 $ XmlText "")
121 check (Tree0 (unCell -> XmlText t)) = Right t
123 ( Just $ P.Tokens $ pure t
124 , Set.singleton $ P.Tokens $ pure expected )
125 int = RNC.rule "int" $ RNC.text >>= \t ->
126 case readMaybe (TL.unpack t) of
128 Nothing -> P.fancyFailure $
129 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
130 rational = RNC.rule "rational" $ RNC.text >>= \t ->
131 case readMaybe (TL.unpack t) of
132 Just (Rational i) | 0 <= i -> return i
133 | otherwise -> P.fancyFailure $
134 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
135 Nothing -> P.fancyFailure $
136 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
137 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
138 case readMaybe (TL.unpack t) of
139 Just (Rational i) | 0 <= i -> return i
140 | otherwise -> P.fancyFailure $
141 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
142 Nothing -> P.fancyFailure $
143 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
144 nat = RNC.rule "nat" $ RNC.int >>= \i ->
147 else P.fancyFailure $ Set.singleton $
148 P.ErrorCustom $ ErrorRead_Not_Nat i
149 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
152 else P.fancyFailure $ Set.singleton $
153 P.ErrorCustom $ ErrorRead_Not_Nat1 i
157 optional = P.optional
162 parserElement :: XmlName -> Parser a -> Cell XMLs -> Parser a
163 parserElement n p (Cell state_tctPos ts) = do
164 let mayNameOrFigureName
165 | n == "aside" = Nothing
168 -- NOTE: special case renaming the current XmlPos
169 -- using the @type attribute to have positions like this:
177 , Just ty <- getFirst $ (`foldMap` ts) $ \case
178 Tree0 (unCell -> XmlAttr "type" ty) -> First $ Just ty
180 = Just $ xmlLocalName $ ty
182 case mayNameOrFigureName of
185 S.put st{state_tctPos}
189 Just nameOrFigureName -> do
190 st@State{state_xmlPos} <- S.get
191 let incrPrecedingSibling name =
194 xmlPos_PrecedingSiblings state_xmlPos
196 { state_xmlPos = state_xmlPos
197 -- NOTE: in children, push current name incremented on ancestors
198 -- and reset preceding siblings.
199 { xmlPos_PrecedingSiblings = mempty
200 , xmlPos_Ancestors = xmlPos_Ancestors state_xmlPos |> (n, incrPrecedingSibling n)
201 , xmlPos_AncestorsWithFigureNames =
202 xmlPos_AncestorsWithFigureNames state_xmlPos |>
204 , incrPrecedingSibling nameOrFigureName )
210 { state_xmlPos = state_xmlPos
211 -- NOTE: after current, increment current name
212 -- and reset ancestors.
213 { xmlPos_PrecedingSiblings =
214 (if n == nameOrFigureName then id
215 else Map.insertWith (const succ) nameOrFigureName 1) $
216 Map.insertWith (const succ) n 1 $
217 xmlPos_PrecedingSiblings state_xmlPos
222 type instance RNC.Perm Parser = P.PermParser XMLs Parser
223 instance RNC.Sym_Interleaved Parser where
224 interleaved = P.makePermParser
229 f <$*> a = f P.<$?> ([],P.some a)
230 f <|*> a = f P.<|?> ([],P.some a)
231 instance DTC.Sym_DTC Parser where
232 posXML = S.gets state_xmlPos
233 posTCT = S.gets state_tctPos
236 DTC.Sym_DTC Parser =>
238 Either (P.ParseError (P.Token XMLs) ErrorRead) DTC.Document
239 readDTC = (fst <$>) . runParser def (P.initialPos "":|[]) DTC.document
241 -- | @runParser st pos p xs@ runs a 'Parser' @p@ to parse @xs@ entirely,
242 -- using state @st@ from position @pos@.
244 DTC.Sym_DTC Parser =>
246 NonEmpty P.SourcePos -> Parser a -> XMLs ->
247 Either (P.ParseError (P.Token XMLs) ErrorRead) (a, State)
248 runParser st pos p inp =
249 let p' = S.runStateT (p <* RNC.none) st in
251 P.runParser' p' P.State
254 case Seq.viewl inp of
255 Tree (Cell ss _) _ :< _ ->
256 (<$> ss) $ \Span{span_begin=bp, span_file} ->
257 P.SourcePos span_file
258 (P.mkPos $ pos_line bp)
259 (P.mkPos $ pos_column bp)
261 , P.stateTabWidth = P.pos1
262 , P.stateTokensProcessed = 0
265 -- | @parser p xs@ returns a 'Parser' parsing @xs@ with @p@,
266 -- applying 'setPosOnNextNode' in case of success.
268 DTC.Sym_DTC Parser =>
269 Parser a -> XMLs -> Parser a
272 P.State{P.statePos=pos} <- P.getParserState
273 case runParser st pos p xs of
274 Left (P.TrivialError statePos un ex) -> do
275 -- NOTE: just re-raising exception.
276 s <- P.getParserState
277 P.setParserState s{P.statePos}
279 Left (P.FancyError statePos errs) -> do
280 -- NOTE: just re-raising exception.
281 s <- P.getParserState
282 P.setParserState s{P.statePos}
286 a <$ setPosOnNextNode
288 -- | Adjust the current 'P.SourcePos'
289 -- to be the begining of the following-sibling 'XML' node
290 -- so that on error 'P.toHints' keeps expected 'P.Token's in the list,
291 -- and thus makes useful error messages.
293 -- This is needed because the end of a 'Cell'
294 -- is not necessarily the begin of the next 'Cell'.
295 setPosOnNextNode :: Parser ()
296 setPosOnNextNode = do
299 , P.statePos = pos :| _
300 } <- P.getParserState
301 case Seq.viewl inp of
303 t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XMLs) pos t
305 instance P.Stream XMLs where
306 type Token XMLs = XML
307 type Tokens XMLs = XMLs
310 Tree (unCell -> XmlComment{}) _ :< ts -> P.take1_ ts
313 positionAt1 _s pos (Tree (Cell (Span{span_begin=Pos l c}:|_) _n) _ts) =
314 pos{ P.sourceLine = P.mkPos l
315 , P.sourceColumn = P.mkPos c }
316 positionAtN s pos ts =
318 t :< _ -> P.positionAt1 s pos t
320 advance1 _s _indent pos (Tree (Cell (Span{span_end=Pos l c}:|_) _n) _ts) =
321 -- WARNING: the end of a 'Cell' is not necessarily
322 -- the beginning of the next 'Cell'.
323 pos{ P.sourceLine = P.mkPos l
324 , P.sourceColumn = P.mkPos c }
325 advanceN s = foldl' . P.advance1 s
326 takeN_ n s | n <= 0 = Just (mempty, s)
328 | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XmlComment and XmlInclude
329 tokensToChunk _s = Seq.fromList
330 chunkToTokens _s = toList
331 chunkLength _s = Seq.length
332 takeWhile_ = Seq.spanl
333 instance P.ShowToken XML where
334 showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks
336 showTree :: XML -> String
337 showTree (Tree a _ts) =
339 XmlElem n -> "<"<>show n<>">"
340 XmlAttr n _v -> show n<>"="
342 XmlComment _c -> "comment"
344 showCell (Cell path@(Span{span_file} :| _) a) f =
347 else f a <> foldMap (\p -> "\n in "<>show p) path
349 -- ** Type 'ErrorRead'
351 = ErrorRead_EndOfInput
352 | ErrorRead_Not_Int TL.Text
353 | ErrorRead_Not_Nat Int
354 | ErrorRead_Not_Nat1 Int
355 | ErrorRead_Not_Rational TL.Text
356 | ErrorRead_Not_Positive TL.Text
357 -- | ErrorRead_Unexpected P.sourcePos XML
358 deriving (Eq,Ord,Show)
359 instance P.ShowErrorComponent ErrorRead where
360 showErrorComponent = show
362 -- ** Type 'Rational'
363 -- | Wrapper to change the 'Read' instance.
364 newtype Rational = Rational Ratio.Rational
365 instance Read Rational where
367 x <- Read.step readPrec
368 Read.expectP (Read.Symbol "/")
369 y <- Read.step readPrec
370 return $ Rational (x % y)