1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Hdoc.DTC.Read.TCT where
11 import Control.Applicative (Applicative(..))
12 import Control.Monad (Monad(..))
14 import Data.Default.Class (Default(..))
15 import Data.Either (Either(..))
16 import Data.Eq (Eq(..))
17 import Data.Foldable (Foldable(..))
18 import Data.Function (($), (.), const, id)
19 import Data.Functor ((<$>), (<$))
21 import Data.List.NonEmpty (NonEmpty(..))
22 import Data.Maybe (Maybe(..), maybe)
23 import Data.Monoid (Monoid(..), First(..))
24 import Data.Ord (Ord(..))
25 import Data.Proxy (Proxy(..))
26 import Data.Ratio ((%))
27 import Data.Semigroup (Semigroup(..))
28 import Data.Sequence (ViewL(..), (|>))
29 import Data.String (String)
30 import Data.Tuple (fst, snd)
31 import Text.Read (readMaybe, Read(..))
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.List as List
35 import qualified Data.Map.Strict as Map
36 import qualified Data.Ratio as Ratio
37 import qualified Data.Sequence as Seq
38 import qualified Data.Set as Set
39 import qualified Data.Text.Lazy as TL
40 import qualified GHC.Read as Read (expectP)
41 import qualified Text.Megaparsec as P
42 import qualified Text.Megaparsec.Perm as P
43 import qualified Text.Read as Read
45 import Hdoc.TCT hiding (Parser, ErrorRead)
46 import Hdoc.Utils (Nat(..), Nat1(..), succNat1)
47 import qualified Hdoc.XML as XML
48 import qualified Hdoc.DTC.Document as DTC
49 import qualified Hdoc.DTC.Sym as DTC
50 import qualified Hdoc.RNC.Sym as RNC
51 import qualified Hdoc.TCT.Cell as TCT
55 { state_posXML :: XML.Pos
56 , state_locTCT :: TCT.Location
57 -- ^ Unfortunately Megaparsec's 'P.statePos'
58 -- is not a good fit to encode 'TCT.Location'.
60 instance Default State where
67 type Parser = S.StateT State (P.Parsec ErrorRead XML.XMLs)
69 instance RNC.Sym_Rule Parser where
70 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
72 instance RNC.Sym_RNC Parser where
73 none = P.label "none" $ P.eof
74 fail = P.label "fail" $ P.failure Nothing mempty
75 any = P.label "any" $ p_satisfyMaybe $ const $ Just ()
76 anyElem p = P.label "anyElem" $ do
77 Cell state_locTCT (n,ts) <- P.token check $ Just expected
78 parserElement n (p n) (Cell state_locTCT ts)
80 expected = Tree (cell0 $ XML.NodeElem "*") mempty
81 check (Tree cell@(unCell -> XML.NodeElem e) ts) = Right $ (e,ts) <$ cell
83 ( Just $ P.Tokens $ pure t
84 , Set.singleton $ P.Tokens $ pure expected )
86 ts <- P.token check $ Just expected
89 expected = Tree (cell0 $ XML.NodeElem n) mempty
90 check (Tree cell@(unCell -> XML.NodeElem e) ts) | e == n = Right (ts <$ cell)
92 ( Just $ P.Tokens $ pure t
93 , Set.singleton $ P.Tokens $ pure expected )
95 v <- P.token check $ Just expected
96 parser p $ Seq.singleton $ Tree0 v
98 expected = Tree0 (cell0 $ XML.NodeAttr n "")
99 check (Tree0 cell@(unCell -> XML.NodeAttr k v)) | k == n =
100 Right $ XML.NodeText v <$ cell
102 ( Just $ P.Tokens $ pure t
103 , Set.singleton $ P.Tokens $ pure expected )
107 Tree0 (unCell -> XML.NodeComment c) :< ts -> do
109 c <$ setPosOnNextNode
110 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
111 EmptyL -> P.failure Nothing ex
113 ex = Set.singleton $ P.Tokens $ pure expected
114 expected = Tree0 (cell0 $ XML.NodeComment "")
116 P.token check (Just expected)
119 expected = Tree0 (cell0 $ XML.NodeText "")
120 check (Tree0 (unCell -> XML.NodeText t)) = Right t
122 ( Just $ P.Tokens $ pure t
123 , Set.singleton $ P.Tokens $ pure expected )
124 int = RNC.rule "int" $ RNC.text >>= \t ->
125 case readMaybe (TL.unpack t) of
127 Nothing -> P.fancyFailure $
128 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
129 rational = RNC.rule "rational" $ RNC.text >>= \t ->
130 case readMaybe (TL.unpack t) of
131 Just (Rational i) | 0 <= i -> return i
132 | otherwise -> P.fancyFailure $
133 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
134 Nothing -> P.fancyFailure $
135 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
136 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
137 case readMaybe (TL.unpack t) of
138 Just (Rational i) | 0 <= i -> return i
139 | otherwise -> P.fancyFailure $
140 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
141 Nothing -> P.fancyFailure $
142 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
143 nat = RNC.rule "nat" $ RNC.int >>= \i ->
146 else P.fancyFailure $ Set.singleton $
147 P.ErrorCustom $ ErrorRead_Not_Nat i
148 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
151 else P.fancyFailure $ Set.singleton $
152 P.ErrorCustom $ ErrorRead_Not_Nat1 i
156 optional = P.optional
161 parserElement :: XML.Name -> Parser a -> Cell XML.XMLs -> Parser a
162 parserElement n p (Cell state_locTCT ts) = do
163 let mayNameOrFigureName
164 | n == "aside" = Nothing
167 -- NOTE: special case renaming the current XML.XmlPos
168 -- using the @type attribute to have positions like this:
176 , Just ty <- getFirst $ (`foldMap` ts) $ \case
177 Tree0 (unCell -> XML.NodeAttr "type" ty) -> First $ Just ty
179 = Just $ XML.localName $ ty
181 case mayNameOrFigureName of
184 S.put st{state_locTCT}
188 Just nameOrFigureName -> do
189 st@State{state_posXML} <- S.get
190 let incrPrecedingSibling name =
191 maybe (Nat1 1) succNat1 $
193 XML.pos_precedingSiblings state_posXML
195 { state_posXML = state_posXML
196 -- NOTE: in children, push current name incremented on ancestors
197 -- and reset preceding siblings.
198 { XML.pos_precedingSiblings = mempty
199 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
200 , XML.pos_ancestorsWithFigureNames =
201 XML.pos_ancestorsWithFigureNames state_posXML |>
203 , incrPrecedingSibling nameOrFigureName )
209 { state_posXML = state_posXML
210 -- NOTE: after current, increment current name
211 -- and reset ancestors.
212 { XML.pos_precedingSiblings =
213 (if n == nameOrFigureName then id
214 else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
215 Map.insertWith (const succNat1) n (Nat1 1) $
216 XML.pos_precedingSiblings state_posXML
221 type instance RNC.Perm Parser = P.PermParser XML.XMLs Parser
222 instance RNC.Sym_Interleaved Parser where
223 interleaved = P.makePermParser
228 f <$*> a = f P.<$?> ([],P.some a)
229 f <|*> a = f P.<|?> ([],P.some a)
230 instance DTC.Sym_DTC Parser where
231 positionXML = S.gets state_posXML
232 locationTCT = S.gets state_locTCT
235 DTC.Sym_DTC Parser =>
237 Either (P.ParseError (P.Token XML.XMLs) ErrorRead) DTC.Document
238 readDTC = (fst <$>) . runParser def (P.initialPos "":|[]) DTC.document
240 -- | @runParser st pos p xs@ runs a 'Parser' @p@ to parse @xs@ entirely,
241 -- using state @st@ from position @pos@.
243 DTC.Sym_DTC Parser =>
245 NonEmpty P.SourcePos -> Parser a -> XML.XMLs ->
246 Either (P.ParseError (P.Token XML.XMLs) ErrorRead) (a, State)
247 runParser st pos p inp =
248 let p' = S.runStateT (p <* RNC.none) st in
250 P.runParser' p' P.State
253 case Seq.viewl inp of
254 Tree (Cell ss _) _ :< _ ->
255 (<$> ss) $ \Span{span_begin=bp, span_file} ->
256 P.SourcePos span_file
257 (P.mkPos $ pos_line bp)
258 (P.mkPos $ pos_column bp)
260 , P.stateTabWidth = P.pos1
261 , P.stateTokensProcessed = 0
264 -- | @parser p xs@ returns a 'Parser' parsing @xs@ with @p@,
265 -- applying 'setPosOnNextNode' in case of success.
267 DTC.Sym_DTC Parser =>
268 Parser a -> XML.XMLs -> Parser a
271 P.State{P.statePos=pos} <- P.getParserState
272 case runParser st pos p xs of
273 Left (P.TrivialError statePos un ex) -> do
274 -- NOTE: just re-raising exception.
275 s <- P.getParserState
276 P.setParserState s{P.statePos}
278 Left (P.FancyError statePos errs) -> do
279 -- NOTE: just re-raising exception.
280 s <- P.getParserState
281 P.setParserState s{P.statePos}
285 a <$ setPosOnNextNode
287 -- | Adjust the current 'P.SourcePos'
288 -- to be the begining of the following-sibling 'XML' node
289 -- so that on error 'P.toHints' keeps expected 'P.Token's in the list,
290 -- and thus makes useful error messages.
292 -- This is needed because the end of a 'Cell'
293 -- is not necessarily the begin of the next 'Cell'.
294 setPosOnNextNode :: Parser ()
295 setPosOnNextNode = do
298 , P.statePos = pos :| _
299 } <- P.getParserState
300 case Seq.viewl inp of
302 t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XML.XMLs) pos t
304 instance P.Stream XML.XMLs where
305 type Token XML.XMLs = XML.XML
306 type Tokens XML.XMLs = XML.XMLs
309 Tree (unCell -> XML.NodeComment{}) _ :< ts -> P.take1_ ts
312 positionAt1 _s pos (Tree (Cell (Span{span_begin=Pos l c}:|_) _n) _ts) =
313 pos{ P.sourceLine = P.mkPos l
314 , P.sourceColumn = P.mkPos c }
315 positionAtN s pos ts =
317 t :< _ -> P.positionAt1 s pos t
319 advance1 _s _indent pos (Tree (Cell (Span{span_end=Pos l c}:|_) _n) _ts) =
320 -- WARNING: the end of a 'Cell' is not necessarily
321 -- the beginning of the next 'Cell'.
322 pos{ P.sourceLine = P.mkPos l
323 , P.sourceColumn = P.mkPos c }
324 advanceN s = foldl' . P.advance1 s
325 takeN_ n s | n <= 0 = Just (mempty, s)
327 | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XML.NodeComment and XML.XmlInclude
328 tokensToChunk _s = Seq.fromList
329 chunkToTokens _s = toList
330 chunkLength _s = Seq.length
331 takeWhile_ = Seq.spanl
332 instance P.ShowToken XML.XML where
333 showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks
335 showTree :: XML.XML -> String
336 showTree (Tree a _ts) =
338 XML.NodeElem n -> "<"<>show n<>">"
339 XML.NodeAttr n _v -> show n<>"="
340 XML.NodeText _t -> "text"
341 XML.NodeComment _c -> "comment"
343 showCell (Cell path@(Span{span_file} :| _) a) f =
346 else f a <> foldMap (\p -> "\n in "<>show p) path
348 -- ** Type 'ErrorRead'
350 = ErrorRead_EndOfInput
351 | ErrorRead_Not_Int TL.Text
352 | ErrorRead_Not_Nat Int
353 | ErrorRead_Not_Nat1 Int
354 | ErrorRead_Not_Rational TL.Text
355 | ErrorRead_Not_Positive TL.Text
356 {- ErrorRead_Unexpected P.sourcePos XML -}
357 deriving (Eq,Ord,Show)
358 instance P.ShowErrorComponent ErrorRead where
359 showErrorComponent = show
361 -- ** Type 'Rational'
362 -- | Wrapper to change the 'Read' instance.
363 newtype Rational = Rational Ratio.Rational
364 instance Read Rational where
366 x <- Read.step readPrec
367 Read.expectP (Read.Symbol "/")
368 y <- Read.step readPrec
369 return $ Rational (x % y)