1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Hdoc.DTC.Read.TCT where
11 import Control.Applicative (Applicative(..))
12 import Control.Monad (Monad(..))
14 import Data.Default.Class (Default(..))
15 import Data.Either (Either(..))
16 import Data.Eq (Eq(..))
17 import Data.Foldable (Foldable(..))
18 import Data.Function (($), (.), const, id)
19 import Data.Functor ((<$>), (<$))
21 import Data.List.NonEmpty (NonEmpty(..))
22 import Data.Maybe (Maybe(..), maybe)
23 import Data.Monoid (Monoid(..), First(..))
24 import Data.Ord (Ord(..))
25 import Data.Proxy (Proxy(..))
26 import Data.Ratio ((%))
27 import Data.Semigroup (Semigroup(..))
28 import Data.Sequence (ViewL(..), (|>))
29 import Data.String (String)
30 import Data.Tuple (fst, snd)
31 import Text.Blaze.DTC (xmlns_dtc)
32 import Text.Read (readMaybe, Read(..))
33 import Text.Show (Show(..))
34 import qualified Control.Monad.Trans.State as S
35 import qualified Data.List as List
36 import qualified Data.Map.Strict as Map
37 import qualified Data.Ratio as Ratio
38 import qualified Data.Sequence as Seq
39 import qualified Data.Set as Set
40 import qualified Data.Text.Lazy as TL
41 import qualified GHC.Read as Read (expectP)
42 import qualified Language.Symantic.RNC as RNC
43 import qualified Language.Symantic.XML as XML
44 import qualified Text.Megaparsec as P
45 import qualified Text.Megaparsec.Perm as P
46 import qualified Text.Read as Read
48 import Hdoc.TCT hiding (Parser, ErrorRead)
49 import Hdoc.Utils (Nat(..), Nat1(..), succNat1)
50 import qualified Hdoc.DTC.Document as DTC
51 import qualified Hdoc.DTC.Sym as DTC
52 import qualified Hdoc.RNC as RNC
53 import qualified Hdoc.XML as XML
54 import qualified Hdoc.TCT.Cell as TCT
58 { state_posXML :: XML.Pos
59 , state_locTCT :: TCT.Location
60 -- ^ Unfortunately Megaparsec's 'P.statePos'
61 -- is not a good fit to encode 'TCT.Location'.
63 instance Default State where
70 type Parser = S.StateT State (P.Parsec ErrorRead XML.XMLs)
72 instance RNC.Sym_Rule Parser where
73 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
76 instance RNC.Sym_RNC Parser where
77 namespace _p _n = pure ()
78 fail = P.label "fail" $ P.failure Nothing mempty
79 any = P.label "any" $ p_satisfyMaybe $ const $ Just ()
80 anyElem ns p = P.label "anyElem" $ do
81 Sourced state_locTCT (n, ts) <- P.token check $ Just expected
82 parserElement n (p $ XML.qNameLocal n) (Sourced state_locTCT ts)
84 expected = XML.Tree (cell0 $ XML.NodeElem $ XML.QName ns $ XML.NCName "*") mempty
85 check (Tree cell@(unSourced -> XML.NodeElem e) ts)
86 | XML.qNameSpace e == ns
87 = Right $ (e,ts) <$ cell
89 ( Just $ P.Tokens $ pure t
90 , Set.singleton $ P.Tokens $ pure expected )
92 ts <- P.token check $ Just expected
95 expected = Tree (cell0 $ XML.NodeElem n) mempty
96 check (Tree cell@(unSourced -> XML.NodeElem e) ts)
97 | e == n = Right (ts <$ cell)
99 ( Just $ P.Tokens $ pure t
100 , Set.singleton $ P.Tokens $ pure expected )
103 ts <- P.token check $ Just expected
106 expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty
107 check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts)
109 = Right $ removePI $ removeXMLNS $ removeSpaces ts
112 (`Seq.filter` xs) $ \case
113 XML.Tree (XML.unSourced -> XML.NodePI{}) _ts -> False
116 if (`all` xs) $ \case
117 XML.Tree (XML.unSourced -> XML.NodeText txt) _ts ->
119 XML.EscapedPlain t -> TL.all Char.isSpace t
122 then (`Seq.filter` xs) $ \case
123 XML.Tree (XML.unSourced -> XML.NodeText{}) _ts -> False
127 let (attrs,rest) = (`Seq.spanl` xs) $ \case
128 XML.Tree (XML.unSourced -> XML.NodeAttr{}) _ts -> True
130 let attrs' = (`Seq.filter` attrs) $ \case
131 XML.Tree (XML.unSourced -> XML.NodeAttr a) _ts ->
133 XML.QName "" "xmlns" -> False
134 XML.QName ns _l -> ns /= XML.xmlns_xmlns
138 ( Just $ P.Tokens $ pure t
139 , Set.singleton $ P.Tokens $ pure expected )
142 v <- P.token check $ Just expected
145 expected = Tree0 (cell0 $ XML.NodeAttr n)
146 check (Tree (unSourced -> XML.NodeAttr k) v)
147 | [Tree (Sourced _ (XML.NodeText _v)) _] <- toList v
150 ( Just $ P.Tokens $ pure t
151 , Set.singleton $ P.Tokens $ pure expected )
153 P.token check (Just expected)
156 expected = Tree0 (cell0 $ XML.NodeText mempty)
157 check (Tree0 (unSourced -> XML.NodeText t)) = Right t
159 ( Just $ P.Tokens $ pure t
160 , Set.singleton $ P.Tokens $ pure expected )
162 optional = P.optional
166 instance Alternative Parser where
171 instance RNC.Sym_RNC_Extra Parser where
172 none = P.label "none" $ P.eof
176 Tree0 (unSourced -> XML.NodeComment c) :< ts -> do
178 c <$ setPosOnNextNode
179 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
180 EmptyL -> P.failure Nothing ex
182 ex = Set.singleton $ P.Tokens $ pure expected
183 expected = Tree0 (cell0 $ XML.NodeComment "")
184 bool = RNC.rule "bool" $ RNC.text >>= \t ->
186 "true" -> return True
187 "false" -> return False
188 _ -> P.fancyFailure $
189 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t
190 int = RNC.rule "int" $ RNC.text >>= \t ->
191 case readMaybe (TL.unpack t) of
193 Nothing -> P.fancyFailure $
194 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
195 rational = RNC.rule "rational" $ RNC.text >>= \t ->
196 case readMaybe (TL.unpack t) of
197 Just (Rational i) | 0 <= i -> return i
198 | otherwise -> P.fancyFailure $
199 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
200 Nothing -> P.fancyFailure $
201 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
202 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
203 case readMaybe (TL.unpack t) of
204 Just (Rational i) | 0 <= i -> return i
205 | otherwise -> P.fancyFailure $
206 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
207 Nothing -> P.fancyFailure $
208 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
209 nat = RNC.rule "nat" $ RNC.int >>= \i ->
212 else P.fancyFailure $ Set.singleton $
213 P.ErrorCustom $ ErrorRead_Not_Nat i
214 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
217 else P.fancyFailure $ Set.singleton $
218 P.ErrorCustom $ ErrorRead_Not_Nat1 i
220 parserElement :: XML.QName -> Parser a -> Cell XML.XMLs -> Parser a
221 parserElement n p (Sourced state_locTCT ts) = do
222 let mayNameOrFigureName
223 | n == "aside" = Nothing
226 -- NOTE: special case renaming the current XML.Pos
227 -- using the @type attribute to have positions like this:
235 , Just ty <- getFirst $ (`foldMap` ts) $ \case
236 Tree (unSourced -> XML.NodeAttr "type") xs
237 | [Tree (Sourced _ (XML.NodeText t)) _] <- toList xs
238 , Just ty <- XML.ncName $ XML.unescapeText t
241 = Just $ XML.QName xmlns_dtc ty
243 case mayNameOrFigureName of
246 S.put st{state_locTCT}
250 Just nameOrFigureName -> do
251 st@State{state_posXML} <- S.get
252 let incrPrecedingSibling name =
253 maybe (Nat1 1) succNat1 $
255 XML.pos_precedingSiblings state_posXML
257 { state_posXML = state_posXML
258 -- NOTE: in children, push current name incremented on ancestors
259 -- and reset preceding siblings.
260 { XML.pos_precedingSiblings = mempty
261 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
262 , XML.pos_ancestorsWithFigureNames =
263 XML.pos_ancestorsWithFigureNames state_posXML |>
265 , incrPrecedingSibling nameOrFigureName )
271 { state_posXML = state_posXML
272 -- NOTE: after current, increment current name
273 -- and reset ancestors.
274 { XML.pos_precedingSiblings =
275 (if n == nameOrFigureName then id
276 else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
277 Map.insertWith (const succNat1) n (Nat1 1) $
278 XML.pos_precedingSiblings state_posXML
283 type instance RNC.Perm Parser = P.PermParser XML.XMLs Parser
284 instance RNC.Sym_Interleaved Parser where
285 interleaved = P.makePermParser
290 f <$*> a = f P.<$?> ([],P.some a)
291 f <|*> a = f P.<|?> ([],P.some a)
292 instance DTC.Sym_DTC Parser where
293 positionXML = S.gets state_posXML
294 locationTCT = S.gets state_locTCT
297 DTC.Sym_DTC Parser =>
299 Either (P.ParseError (P.Token XML.XMLs) ErrorRead) DTC.Document
300 readDTC = (fst <$>) . runParser def (P.initialPos "":|[]) DTC.document
302 -- | @runParser st pos p xs@ runs a 'Parser' @p@ to parse @xs@ entirely,
303 -- using state @st@ from position @pos@.
305 DTC.Sym_DTC Parser =>
307 NonEmpty P.SourcePos -> Parser a -> XML.XMLs ->
308 Either (P.ParseError (P.Token XML.XMLs) ErrorRead) (a, State)
309 runParser st pos p inp =
310 let p' = S.runStateT (p <* RNC.none) st in
312 P.runParser' p' P.State
315 case Seq.viewl inp of
316 Tree (Sourced ss _) _ :< _ ->
317 (<$> ss) $ \FileRange{fileRange_begin=bp, fileRange_file} ->
318 P.SourcePos fileRange_file
319 (P.mkPos $ filePos_line bp)
320 (P.mkPos $ filePos_column bp)
322 , P.stateTabWidth = P.pos1
323 , P.stateTokensProcessed = 0
326 -- | @parser p xs@ returns a 'Parser' parsing @xs@ with @p@,
327 -- applying 'setPosOnNextNode' in case of success.
329 DTC.Sym_DTC Parser =>
330 Parser a -> XML.XMLs -> Parser a
333 P.State{P.statePos=pos} <- P.getParserState
334 case runParser st pos p xs of
335 Left (P.TrivialError statePos un ex) -> do
336 -- NOTE: just re-raising exception.
337 s <- P.getParserState
338 P.setParserState s{P.statePos}
340 Left (P.FancyError statePos errs) -> do
341 -- NOTE: just re-raising exception.
342 s <- P.getParserState
343 P.setParserState s{P.statePos}
347 a <$ setPosOnNextNode
349 -- | Adjust the current 'P.SourcePos'
350 -- to be the begining of the following-sibling 'XML' node
351 -- so that on error 'P.toHints' keeps expected 'P.Token's in the list,
352 -- and thus makes useful error messages.
354 -- This is needed because the end of a 'Sourced'
355 -- is not necessarily the begin of the next 'Sourced'.
356 setPosOnNextNode :: Parser ()
357 setPosOnNextNode = do
360 , P.statePos = pos :| _
361 } <- P.getParserState
362 case Seq.viewl inp of
364 t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XML.XMLs) pos t
367 instance P.Stream XML.XMLs where
368 type Token XML.XMLs = XML.XML
369 type Tokens XML.XMLs = XML.XMLs
372 Tree (unSourced -> XML.NodeComment{}) _ :< ts -> P.take1_ ts
375 positionAt1 _s pos (Tree (Sourced (FileRange{fileRange_begin=FilePos l c}:|_) _n) _ts) =
376 pos{ P.sourceLine = P.mkPos l
377 , P.sourceColumn = P.mkPos c }
378 positionAtN s pos ts =
380 t :< _ -> P.positionAt1 s pos t
382 advance1 _s _indent pos (Tree (Sourced (FileRange{fileRange_end=FilePos l c}:|_) _n) _ts) =
383 -- WARNING: the end of a 'Sourced' is not necessarily
384 -- the beginning of the next 'Sourced'.
385 pos{ P.sourceLine = P.mkPos l
386 , P.sourceColumn = P.mkPos c }
387 advanceN s = foldl' . P.advance1 s
388 takeN_ n s | n <= 0 = Just (mempty, s)
390 | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XML.NodeComment and XML.XmlInclude
391 tokensToChunk _s = Seq.fromList
392 chunkToTokens _s = toList
393 chunkLength _s = Seq.length
394 takeWhile_ = Seq.spanl
396 instance P.ShowToken XML.XML where
397 showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks
399 showTree :: XML.XML -> String
400 showTree (Tree a _ts) =
401 showSourced a $ \case
402 XML.NodeAttr n -> "attribute "<>show (remove_XMLNS_DTC n)
403 XML.NodeCDATA _t -> "cdata"
404 XML.NodeComment _c -> "comment"
405 XML.NodeElem n -> "element "<>show (remove_XMLNS_DTC n)
406 XML.NodePI n _t -> "processing-instruction"<>show n
407 XML.NodeText _t -> "text"
409 | XML.qNameSpace n == xmlns_dtc = n{XML.qNameSpace=""}
412 showSourced (Sourced path@(FileRange{fileRange_file} :| _) a) f =
413 if null fileRange_file
415 else f a <> foldMap (\p -> "\n in "<>show p) path
417 -- ** Type 'ErrorRead'
419 = ErrorRead_EndOfInput
420 | ErrorRead_Not_Bool TL.Text
421 | ErrorRead_Not_Int TL.Text
422 | ErrorRead_Not_Nat Int
423 | ErrorRead_Not_Nat1 Int
424 | ErrorRead_Not_Rational TL.Text
425 | ErrorRead_Not_Positive TL.Text
426 {- ErrorRead_Unexpected P.sourcePos XML -}
427 deriving (Eq,Ord,Show)
428 instance P.ShowErrorComponent ErrorRead where
429 showErrorComponent = show
431 -- ** Type 'Rational'
432 -- | Wrapper to change the 'Read' instance.
433 newtype Rational = Rational Ratio.Rational
434 instance Read Rational where
436 x <- Read.step readPrec
437 Read.expectP (Read.Symbol "/")
438 y <- Read.step readPrec
439 return $ Rational (x % y)