1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Hdoc.DTC.Read.TCT where
10 import Control.Applicative (Applicative(..), optional)
11 import Control.Monad (Monad(..))
13 import Data.Default.Class (Default(..))
14 import Data.Either (Either(..))
15 import Data.Eq (Eq(..))
16 import Data.Foldable (Foldable(..), all)
17 import Data.Function (($), (.), const, id)
18 import Data.Functor ((<$>))
20 import Data.List.NonEmpty (NonEmpty(..))
21 import Data.Maybe (Maybe(..), maybe)
22 import Data.Monoid (Monoid(..), First(..))
23 import Data.Ord (Ord(..))
24 import Data.Ratio ((%))
25 import Data.Semigroup (Semigroup(..))
26 import Data.Sequence (ViewL(..), (|>))
27 import Data.String (String)
28 import Data.Tuple (fst, snd)
29 import Prelude (error)
30 import Text.Blaze.DTC (xmlns_dtc)
31 import Text.Read (readMaybe, Read(..))
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.Char as Char
35 import qualified Data.List as List
36 import qualified Data.List.NonEmpty as NonEmpty
37 import qualified Data.Map.Strict as Map
38 import qualified Data.Ratio as Ratio
39 import qualified Data.Sequence as Seq
40 import qualified Data.Set as Set
41 import qualified Data.Text.Lazy as TL
42 import qualified GHC.Read as Read (expectP)
43 import qualified Language.Symantic.RNC as RNC
44 import qualified Language.Symantic.XML as XML
45 import qualified Text.Megaparsec as P
46 import qualified Text.Read as Read
48 import Hdoc.TCT hiding (Parser, ErrorRead)
49 import Hdoc.XML (XML, XMLs)
50 import Hdoc.Utils (Nat(..), Nat1(..), succNat1)
51 import qualified Hdoc.DTC.Document as DTC
52 import qualified Hdoc.DTC.Sym as DTC
53 import qualified Hdoc.RNC as RNC
54 import qualified Hdoc.XML as XML
55 import qualified Hdoc.TCT.Cell as TCT
60 Either (P.ParseErrorBundle XMLs ErrorRead) DTC.Document
61 readDTC stateInput = (fst <$>) $ snd $
62 P.runParser' (S.runStateT (DTC.document <* P.eof) (def::State)) P.State
66 error "[BUG] validateXML: getSourcePos is not helpful here, please use annotated source locations"
67 -- NOTE: reporting the node number is less helpful
68 -- than the source text line and number where the node is;
69 -- P.statePosState is only used by P.getSourcePos.
74 { state_posXML :: XML.Pos
75 , state_locTCT :: TCT.Location
77 instance Default State where
84 type Parser = S.StateT State (P.Parsec ErrorRead XMLs)
86 instance RNC.Sym_Rule Parser where
87 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
90 instance RNC.Sym_RNC Parser where
91 namespace _p _n = pure ()
93 ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
96 expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty
97 check (XML.Tree (XML.Sourced src (XML.NodeElem e)) ts)
99 = Just $ XML.Sourced src $ removePI $ removeXMLNS $ removeSpaces ts
102 (`Seq.filter` xs) $ \case
103 XML.Tree (XML.unSourced -> XML.NodePI{}) _ts -> False
106 if (`all` xs) $ \case
107 XML.Tree (XML.unSourced -> XML.NodeText (XML.EscapedText et)) _ts ->
109 XML.EscapedPlain t -> TL.all Char.isSpace t
112 then (`Seq.filter` xs) $ \case
113 XML.Tree (XML.unSourced -> XML.NodeText{}) _ts -> False
117 let (attrs,rest) = (`Seq.spanl` xs) $ \case
118 XML.Tree (XML.unSourced -> XML.NodeAttr{}) _ts -> True
120 let attrs' = (`Seq.filter` attrs) $ \case
121 XML.Tree (XML.unSourced -> XML.NodeAttr a) _ts ->
123 XML.QName "" "xmlns" -> False
124 XML.QName ns _l -> ns /= XML.xmlns_xmlns
129 ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
132 expected = XML.Tree0 (XML.notSourced $ XML.NodeAttr n)
133 check (XML.Tree (XML.unSourced -> XML.NodeAttr k)
134 v@(toList -> [XML.Tree0 (XML.unSourced -> XML.NodeText _v)])) | k == n =
137 any = P.label "any" $
138 P.token (const $ Just ()) Set.empty
139 anyElem ns p = P.label "anyElem" $ do
140 (n,ts) <- P.token check $ Set.singleton $ P.Tokens $ pure expected
141 p_XMLs (p $ XML.qNameLocal n) ts
143 expected = XML.Tree (XML.notSourced $ XML.NodeElem $ XML.QName ns $ XML.NCName "*") mempty
144 check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts)
145 | XML.qNameSpace e == ns
149 P.token check $ Set.singleton $ P.Tokens $ pure expected
151 expected = XML.Tree0 (XML.notSourced $ XML.NodeText $ XML.EscapedText mempty)
152 check (XML.Tree0 (XML.unSourced -> XML.NodeText t)) = Just t
154 optional = P.optional
158 fail = P.label "fail" $ P.failure Nothing mempty
159 type instance RNC.Permutation Parser = RNC.Perm Parser
160 instance RNC.Sym_Permutation Parser where
161 runPermutation (RNC.Perm value parser) = optional parser >>= f
163 -- NOTE: copy Control.Applicative.Permutations.runPermutation
164 -- to replace the commented empty below so that P.TrivialError
165 -- has the unexpected token.
166 f Nothing = maybe {-empty-}(P.token (const Nothing) Set.empty) pure value
167 f (Just p) = RNC.runPermutation p
168 toPermutation p = RNC.Perm Nothing $ pure <$> p
169 toPermutationWithDefault v p = RNC.Perm (Just v) $ pure <$> p
171 instance P.Stream XMLs where
172 type Token XMLs = XML
173 type Tokens XMLs = XMLs
177 t@(XML.Tree XML.Sourced{XML.unSourced=n} _) :< ts
178 | RNC.isIgnoredNode n -> P.take1_ ts
179 | otherwise -> Just (t, ts)
180 takeN_ n s | n <= 0 = Just (mempty, s)
183 let (ns,rs) = Seq.splitAt n s in
184 let (ko,ok) = Seq.partition (RNC.isIgnoredNode . XML.unSourced . XML.unTree) ns in
185 case P.takeN_ (Seq.length ko) rs of
186 Nothing -> Just (ok, rs)
187 Just (ns',rs') -> Just (ok<>ns', rs')
188 tokensToChunk _s = Seq.fromList
189 chunkToTokens _s = toList
190 chunkLength _s = Seq.length
191 takeWhile_ = Seq.spanl
192 -- | NOTE: unimplemented: useless since each 'XML.Node' is annotated with its 'FileSource'.
193 reachOffset = error "[BUG] P.Stream XMLs: reachOffset is not helpful, please use annotated source locations"
194 -- | NOTE: useless since each 'XML.Node' is annotated with its 'FileSource'.
195 reachOffsetNoLine = error "[BUG] P.Stream XMLs: reachOffsetNoLine is not helpful, please use annotated source locations"
196 showTokens _s toks = List.intercalate ", " $ toList $ showTree <$> toks
198 showTree :: XML -> String
199 showTree (Tree a _ts) =
200 showSourced a $ \case
201 XML.NodeAttr n -> show (remove_XMLNS_DTC n)<>"="
202 XML.NodeCDATA _t -> "cdata"
203 XML.NodeComment _c -> "comment"
204 XML.NodeElem n -> "<"<>show (remove_XMLNS_DTC n)<>">"
205 XML.NodePI n _t -> "processing-instruction"<>show n
206 XML.NodeText _t -> "text"
208 | XML.qNameSpace n == xmlns_dtc = n{XML.qNameSpace=""}
211 showSourced (Sourced path@(FileRange{fileRange_file} :| _) a) f =
212 if null fileRange_file
214 else f a <> foldMap (\p -> "\n in "<>show p) path
216 -- | @p_XMLs p xs@ returns a parser parsing @xs@ entirely with @p@,
217 -- updating 'P.stateOffset' and re-raising any exception.
218 p_XMLs :: Parser a -> XMLs -> Parser a
219 p_XMLs p stateInput = do
221 st <- P.getParserState
222 let (st', res) = P.runParser' (S.runStateT (p <* P.eof) s) P.State
223 { P.stateInput = stateInput
224 , P.stateOffset = P.stateOffset st
225 , P.statePosState = P.PosState
226 { P.pstateInput = stateInput
227 , P.pstateOffset = P.stateOffset st
228 , P.pstateSourcePos = P.pstateSourcePos $ P.statePosState st
229 , P.pstateTabWidth = P.pos1
230 , P.pstateLinePrefix = ""
233 P.updateParserState (\ps -> ps{P.stateOffset = P.stateOffset st'})
238 Left (P.ParseErrorBundle errs _) ->
239 case NonEmpty.head errs of
240 P.TrivialError _o us es -> P.failure us es
242 lift $ P.ParsecT $ \ps _cok cerr _eok _eerr ->
243 cerr (P.TrivialError o us es) ps
245 P.FancyError _o es -> P.fancyFailure es
247 p_element :: XML.QName -> Parser a -> Cell XMLs -> Parser a
248 p_element n p (Sourced state_locTCT ts) = do
249 let mayNameOrFigureName
250 | n == "aside" = Nothing
253 -- NOTE: special case renaming the current XML.Pos
254 -- using the @type attribute to have positions like this:
262 , Just ty <- getFirst $ (`foldMap` ts) $ \case
263 Tree (unSourced -> XML.NodeAttr "type") xs
264 | [Tree (Sourced _ (XML.NodeText t)) _] <- toList xs
265 , Just ty <- XML.ncName $ XML.unescapeText t
268 = Just $ XML.QName xmlns_dtc ty
270 case mayNameOrFigureName of
273 S.put st{state_locTCT}
277 Just nameOrFigureName -> do
278 st@State{state_posXML} <- S.get
279 let incrPrecedingSibling name =
280 maybe (Nat1 1) succNat1 $
282 XML.pos_precedingSiblings state_posXML
284 { state_posXML = state_posXML
285 -- NOTE: in children, push current name incremented on ancestors
286 -- and reset preceding siblings.
287 { XML.pos_precedingSiblings = mempty
288 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
289 , XML.pos_ancestorsWithFigureNames =
290 XML.pos_ancestorsWithFigureNames state_posXML |>
292 , incrPrecedingSibling nameOrFigureName )
298 { state_posXML = state_posXML
299 -- NOTE: after current, increment current name
300 -- and reset ancestors.
301 { XML.pos_precedingSiblings =
302 (if n == nameOrFigureName then id
303 else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
304 Map.insertWith (const succNat1) n (Nat1 1) $
305 XML.pos_precedingSiblings state_posXML
310 instance RNC.Sym_RNC_Extra Parser where
311 none = RNC.rule "none" $ P.eof
315 Tree0 (unSourced -> XML.NodeComment c) :< ts -> do
318 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
319 EmptyL -> P.failure Nothing ex
321 ex = Set.singleton $ P.Tokens $ pure expected
322 expected = Tree0 (cell0 $ XML.NodeComment "")
323 bool = RNC.rule "bool" $ RNC.text >>= \t ->
325 "true" -> return True
326 "false" -> return False
327 _ -> P.fancyFailure $
328 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t
329 int = RNC.rule "int" $ RNC.text >>= \t ->
330 case readMaybe (TL.unpack t) of
332 Nothing -> P.fancyFailure $
333 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
334 rational = RNC.rule "rational" $ RNC.text >>= \t ->
335 case readMaybe (TL.unpack t) of
336 Just (Rational i) | 0 <= i -> return i
337 | otherwise -> P.fancyFailure $
338 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
339 Nothing -> P.fancyFailure $
340 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
341 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
342 case readMaybe (TL.unpack t) of
343 Just (Rational i) | 0 <= i -> return i
344 | otherwise -> P.fancyFailure $
345 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
346 Nothing -> P.fancyFailure $
347 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
348 nat = RNC.rule "nat" $ RNC.int >>= \i ->
351 else P.fancyFailure $ Set.singleton $
352 P.ErrorCustom $ ErrorRead_Not_Nat i
353 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
356 else P.fancyFailure $ Set.singleton $
357 P.ErrorCustom $ ErrorRead_Not_Nat1 i
358 instance DTC.Sym_DTC Parser where
359 positionXML = S.gets state_posXML
360 locationTCT = S.gets state_locTCT
363 -- ** Type 'ErrorRead'
365 = ErrorRead_EndOfInput
366 | ErrorRead_Not_Bool TL.Text
367 | ErrorRead_Not_Int TL.Text
368 | ErrorRead_Not_Nat Int
369 | ErrorRead_Not_Nat1 Int
370 | ErrorRead_Not_Rational TL.Text
371 | ErrorRead_Not_Positive TL.Text
372 deriving (Eq,Ord,Show)
373 instance P.ShowErrorComponent ErrorRead where
374 showErrorComponent = show
376 -- ** Type 'Rational'
377 -- | Wrapper to change the 'Read' instance.
378 newtype Rational = Rational Ratio.Rational
379 instance Read Rational where
381 x <- Read.step readPrec
382 Read.expectP (Read.Symbol "/")
383 y <- Read.step readPrec
384 return $ Rational (x % y)