1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Hdoc.DTC.Read.TCT where
11 import Control.Applicative (Applicative(..))
12 import Control.Monad (Monad(..))
14 import Data.Default.Class (Default(..))
15 import Data.Either (Either(..))
16 import Data.Eq (Eq(..))
17 import Data.Foldable (Foldable(..))
18 import Data.Function (($), (.), const, id)
19 import Data.Functor ((<$>), (<$))
21 import Data.List.NonEmpty (NonEmpty(..))
22 import Data.Maybe (Maybe(..), maybe)
23 import Data.Monoid (Monoid(..), First(..))
24 import Data.Ord (Ord(..))
25 import Data.Proxy (Proxy(..))
26 import Data.Ratio ((%))
27 import Data.Semigroup (Semigroup(..))
28 import Data.Sequence (ViewL(..), (|>))
29 import Data.String (String)
30 import Data.Tuple (fst, snd)
31 import Text.Read (readMaybe, Read(..))
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.List as List
35 import qualified Data.Map.Strict as Map
36 import qualified Data.Ratio as Ratio
37 import qualified Data.Sequence as Seq
38 import qualified Data.Set as Set
39 import qualified Data.Text.Lazy as TL
40 import qualified GHC.Read as Read (expectP)
41 import qualified Text.Megaparsec as P
42 import qualified Text.Megaparsec.Perm as P
43 import qualified Text.Read as Read
45 import Hdoc.TCT hiding (Parser, ErrorRead)
46 import Hdoc.Utils (Nat(..), Nat1(..), succNat1)
47 import qualified Hdoc.XML as XML
48 import qualified Hdoc.DTC.Document as DTC
49 import qualified Hdoc.DTC.Sym as DTC
50 import qualified Hdoc.RNC.Sym as RNC
51 import qualified Hdoc.TCT.Cell as TCT
55 { state_posXML :: XML.Pos
56 , state_locTCT :: TCT.Location
57 -- ^ Unfortunately Megaparsec's 'P.statePos'
58 -- is not a good fit to encode 'TCT.Location'.
60 instance Default State where
67 type Parser = S.StateT State (P.Parsec ErrorRead XML.XMLs)
69 instance RNC.Sym_Rule Parser where
70 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
72 instance RNC.Sym_RNC Parser where
73 none = P.label "none" $ P.eof
74 fail = P.label "fail" $ P.failure Nothing mempty
75 any = P.label "any" $ p_satisfyMaybe $ const $ Just ()
76 anyElem p = P.label "anyElem" $ do
77 Cell state_locTCT (n,ts) <- P.token check $ Just expected
78 parserElement n (p n) (Cell state_locTCT ts)
80 expected = Tree (cell0 $ XML.NodeElem "*") mempty
81 check (Tree cell@(unCell -> XML.NodeElem e) ts) = Right $ (e,ts) <$ cell
83 ( Just $ P.Tokens $ pure t
84 , Set.singleton $ P.Tokens $ pure expected )
86 ts <- P.token check $ Just expected
89 expected = Tree (cell0 $ XML.NodeElem n) mempty
90 check (Tree cell@(unCell -> XML.NodeElem e) ts) | e == n = Right (ts <$ cell)
92 ( Just $ P.Tokens $ pure t
93 , Set.singleton $ P.Tokens $ pure expected )
95 v <- P.token check $ Just expected
96 parser p $ Seq.singleton $ Tree0 v
98 expected = Tree0 (cell0 $ XML.NodeAttr n "")
99 check (Tree0 cell@(unCell -> XML.NodeAttr k v)) | k == n =
100 Right $ XML.NodeText v <$ cell
102 ( Just $ P.Tokens $ pure t
103 , Set.singleton $ P.Tokens $ pure expected )
107 Tree0 (unCell -> XML.NodeComment c) :< ts -> do
109 c <$ setPosOnNextNode
110 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
111 EmptyL -> P.failure Nothing ex
113 ex = Set.singleton $ P.Tokens $ pure expected
114 expected = Tree0 (cell0 $ XML.NodeComment "")
116 P.token check (Just expected)
119 expected = Tree0 (cell0 $ XML.NodeText "")
120 check (Tree0 (unCell -> XML.NodeText t)) = Right t
122 ( Just $ P.Tokens $ pure t
123 , Set.singleton $ P.Tokens $ pure expected )
124 bool = RNC.rule "bool" $ RNC.text >>= \t ->
126 "true" -> return True
127 "false" -> return False
128 _ -> P.fancyFailure $
129 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t
130 int = RNC.rule "int" $ RNC.text >>= \t ->
131 case readMaybe (TL.unpack t) of
133 Nothing -> P.fancyFailure $
134 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
135 rational = RNC.rule "rational" $ RNC.text >>= \t ->
136 case readMaybe (TL.unpack t) of
137 Just (Rational i) | 0 <= i -> return i
138 | otherwise -> P.fancyFailure $
139 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
140 Nothing -> P.fancyFailure $
141 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
142 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
143 case readMaybe (TL.unpack t) of
144 Just (Rational i) | 0 <= i -> return i
145 | otherwise -> P.fancyFailure $
146 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
147 Nothing -> P.fancyFailure $
148 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
149 nat = RNC.rule "nat" $ RNC.int >>= \i ->
152 else P.fancyFailure $ Set.singleton $
153 P.ErrorCustom $ ErrorRead_Not_Nat i
154 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
157 else P.fancyFailure $ Set.singleton $
158 P.ErrorCustom $ ErrorRead_Not_Nat1 i
162 optional = P.optional
167 parserElement :: XML.Name -> Parser a -> Cell XML.XMLs -> Parser a
168 parserElement n p (Cell state_locTCT ts) = do
169 let mayNameOrFigureName
170 | n == "aside" = Nothing
173 -- NOTE: special case renaming the current XML.XmlPos
174 -- using the @type attribute to have positions like this:
182 , Just ty <- getFirst $ (`foldMap` ts) $ \case
183 Tree0 (unCell -> XML.NodeAttr "type" ty) -> First $ Just ty
185 = Just $ XML.localName $ ty
187 case mayNameOrFigureName of
190 S.put st{state_locTCT}
194 Just nameOrFigureName -> do
195 st@State{state_posXML} <- S.get
196 let incrPrecedingSibling name =
197 maybe (Nat1 1) succNat1 $
199 XML.pos_precedingSiblings state_posXML
201 { state_posXML = state_posXML
202 -- NOTE: in children, push current name incremented on ancestors
203 -- and reset preceding siblings.
204 { XML.pos_precedingSiblings = mempty
205 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
206 , XML.pos_ancestorsWithFigureNames =
207 XML.pos_ancestorsWithFigureNames state_posXML |>
209 , incrPrecedingSibling nameOrFigureName )
215 { state_posXML = state_posXML
216 -- NOTE: after current, increment current name
217 -- and reset ancestors.
218 { XML.pos_precedingSiblings =
219 (if n == nameOrFigureName then id
220 else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
221 Map.insertWith (const succNat1) n (Nat1 1) $
222 XML.pos_precedingSiblings state_posXML
227 type instance RNC.Perm Parser = P.PermParser XML.XMLs Parser
228 instance RNC.Sym_Interleaved Parser where
229 interleaved = P.makePermParser
234 f <$*> a = f P.<$?> ([],P.some a)
235 f <|*> a = f P.<|?> ([],P.some a)
236 instance DTC.Sym_DTC Parser where
237 positionXML = S.gets state_posXML
238 locationTCT = S.gets state_locTCT
241 DTC.Sym_DTC Parser =>
243 Either (P.ParseError (P.Token XML.XMLs) ErrorRead) DTC.Document
244 readDTC = (fst <$>) . runParser def (P.initialPos "":|[]) DTC.document
246 -- | @runParser st pos p xs@ runs a 'Parser' @p@ to parse @xs@ entirely,
247 -- using state @st@ from position @pos@.
249 DTC.Sym_DTC Parser =>
251 NonEmpty P.SourcePos -> Parser a -> XML.XMLs ->
252 Either (P.ParseError (P.Token XML.XMLs) ErrorRead) (a, State)
253 runParser st pos p inp =
254 let p' = S.runStateT (p <* RNC.none) st in
256 P.runParser' p' P.State
259 case Seq.viewl inp of
260 Tree (Cell ss _) _ :< _ ->
261 (<$> ss) $ \Span{span_begin=bp, span_file} ->
262 P.SourcePos span_file
263 (P.mkPos $ pos_line bp)
264 (P.mkPos $ pos_column bp)
266 , P.stateTabWidth = P.pos1
267 , P.stateTokensProcessed = 0
270 -- | @parser p xs@ returns a 'Parser' parsing @xs@ with @p@,
271 -- applying 'setPosOnNextNode' in case of success.
273 DTC.Sym_DTC Parser =>
274 Parser a -> XML.XMLs -> Parser a
277 P.State{P.statePos=pos} <- P.getParserState
278 case runParser st pos p xs of
279 Left (P.TrivialError statePos un ex) -> do
280 -- NOTE: just re-raising exception.
281 s <- P.getParserState
282 P.setParserState s{P.statePos}
284 Left (P.FancyError statePos errs) -> do
285 -- NOTE: just re-raising exception.
286 s <- P.getParserState
287 P.setParserState s{P.statePos}
291 a <$ setPosOnNextNode
293 -- | Adjust the current 'P.SourcePos'
294 -- to be the begining of the following-sibling 'XML' node
295 -- so that on error 'P.toHints' keeps expected 'P.Token's in the list,
296 -- and thus makes useful error messages.
298 -- This is needed because the end of a 'Cell'
299 -- is not necessarily the begin of the next 'Cell'.
300 setPosOnNextNode :: Parser ()
301 setPosOnNextNode = do
304 , P.statePos = pos :| _
305 } <- P.getParserState
306 case Seq.viewl inp of
308 t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XML.XMLs) pos t
310 instance P.Stream XML.XMLs where
311 type Token XML.XMLs = XML.XML
312 type Tokens XML.XMLs = XML.XMLs
315 Tree (unCell -> XML.NodeComment{}) _ :< ts -> P.take1_ ts
318 positionAt1 _s pos (Tree (Cell (Span{span_begin=Pos l c}:|_) _n) _ts) =
319 pos{ P.sourceLine = P.mkPos l
320 , P.sourceColumn = P.mkPos c }
321 positionAtN s pos ts =
323 t :< _ -> P.positionAt1 s pos t
325 advance1 _s _indent pos (Tree (Cell (Span{span_end=Pos l c}:|_) _n) _ts) =
326 -- WARNING: the end of a 'Cell' is not necessarily
327 -- the beginning of the next 'Cell'.
328 pos{ P.sourceLine = P.mkPos l
329 , P.sourceColumn = P.mkPos c }
330 advanceN s = foldl' . P.advance1 s
331 takeN_ n s | n <= 0 = Just (mempty, s)
333 | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XML.NodeComment and XML.XmlInclude
334 tokensToChunk _s = Seq.fromList
335 chunkToTokens _s = toList
336 chunkLength _s = Seq.length
337 takeWhile_ = Seq.spanl
338 instance P.ShowToken XML.XML where
339 showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks
341 showTree :: XML.XML -> String
342 showTree (Tree a _ts) =
344 XML.NodeElem n -> "<"<>show n<>">"
345 XML.NodeAttr n _v -> show n<>"="
346 XML.NodeText _t -> "text"
347 XML.NodeComment _c -> "comment"
349 showCell (Cell path@(Span{span_file} :| _) a) f =
352 else f a <> foldMap (\p -> "\n in "<>show p) path
354 -- ** Type 'ErrorRead'
356 = ErrorRead_EndOfInput
357 | ErrorRead_Not_Bool TL.Text
358 | ErrorRead_Not_Int TL.Text
359 | ErrorRead_Not_Nat Int
360 | ErrorRead_Not_Nat1 Int
361 | ErrorRead_Not_Rational TL.Text
362 | ErrorRead_Not_Positive TL.Text
363 {- ErrorRead_Unexpected P.sourcePos XML -}
364 deriving (Eq,Ord,Show)
365 instance P.ShowErrorComponent ErrorRead where
366 showErrorComponent = show
368 -- ** Type 'Rational'
369 -- | Wrapper to change the 'Read' instance.
370 newtype Rational = Rational Ratio.Rational
371 instance Read Rational where
373 x <- Read.step readPrec
374 Read.expectP (Read.Symbol "/")
375 y <- Read.step readPrec
376 return $ Rational (x % y)