1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Textphile.DTC.Read.TCT where
10 import Control.Applicative (Applicative(..), optional)
11 import Control.Monad (Monad(..))
13 import Data.Default.Class (Default(..))
14 import Data.Either (Either(..))
15 import Data.Eq (Eq(..))
16 import Data.Foldable (Foldable(..), all)
17 import Data.Function (($), (.), const, id)
18 import Data.Functor ((<$>))
20 import Data.List.NonEmpty (NonEmpty(..))
21 import Data.Maybe (Maybe(..), maybe)
22 import Data.Monoid (Monoid(..), First(..))
23 import Data.Ord (Ord(..))
24 import Data.Ratio ((%))
25 import Data.Semigroup (Semigroup(..))
26 import Data.Sequence (ViewL(..), (|>))
27 import Data.String (String)
28 import Data.Tuple (fst, snd)
29 import Prelude (error)
30 import Text.Blaze.DTC (xmlns_dtc)
31 import Text.Read (readMaybe, Read(..))
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.Char as Char
35 import qualified Data.List as List
36 import qualified Data.List.NonEmpty as NonEmpty
37 import qualified Data.Map.Strict as Map
38 import qualified Data.Ratio as Ratio
39 import qualified Data.Sequence as Seq
40 import qualified Data.Set as Set
41 import qualified Data.Text.Lazy as TL
42 import qualified GHC.Read as Read (expectP)
43 import qualified Symantic.RNC as RNC
44 import qualified Symantic.XML as XML
45 import qualified Text.Megaparsec as P
46 import qualified Text.Read as Read
48 import Textphile.TCT hiding (Parser, ErrorRead)
49 import Textphile.XML (XML, XMLs)
50 import Textphile.Utils (Nat(..), Nat1(..), succNat1)
51 import qualified Textphile.DTC.Document as DTC
52 import qualified Textphile.DTC.Sym as DTC
53 import qualified Textphile.RNC as RNC
54 import qualified Textphile.XML as XML
55 import qualified Textphile.TCT.Cell as TCT
60 Either (P.ParseErrorBundle XMLs ErrorRead) DTC.Document
61 readDTC stateInput = (fst <$>) $ snd $
62 P.runParser' (S.runStateT (DTC.document <* P.eof) (def::State)) P.State
65 , P.stateParseErrors = []
67 error "[BUG] validateXML: getSourcePos is not helpful here, please use annotated source locations"
68 -- NOTE: reporting the node number is less helpful
69 -- than the source text line and number where the node is;
70 -- P.statePosState is only used by P.getSourcePos.
75 { state_posXML :: XML.Pos
76 , state_locTCT :: TCT.Location
78 instance Default State where
85 type Parser = S.StateT State (P.Parsec ErrorRead XMLs)
87 instance RNC.Sym_Rule Parser where
88 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
91 instance RNC.Sym_RNC Parser where
92 namespace _p _n = pure ()
94 ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
97 expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty
98 check (XML.Tree (XML.Sourced src (XML.NodeElem e)) ts)
100 = Just $ XML.Sourced src $ removePI $ removeXMLNS $ removeSpaces ts
103 (`Seq.filter` xs) $ \case
104 XML.Tree (XML.unSourced -> XML.NodePI{}) _ts -> False
107 if (`all` xs) $ \case
108 XML.Tree (XML.unSourced -> XML.NodeText (XML.EscapedText et)) _ts ->
110 XML.EscapedPlain t -> TL.all Char.isSpace t
113 then (`Seq.filter` xs) $ \case
114 XML.Tree (XML.unSourced -> XML.NodeText{}) _ts -> False
118 let (attrs,rest) = (`Seq.spanl` xs) $ \case
119 XML.Tree (XML.unSourced -> XML.NodeAttr{}) _ts -> True
121 let attrs' = (`Seq.filter` attrs) $ \case
122 XML.Tree (XML.unSourced -> XML.NodeAttr a) _ts ->
124 XML.QName "" "xmlns" -> False
125 XML.QName ns _l -> ns /= XML.xmlns_xmlns
130 ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
133 expected = XML.Tree0 (XML.notSourced $ XML.NodeAttr n)
134 check (XML.Tree (XML.unSourced -> XML.NodeAttr k)
135 v@(toList -> [XML.Tree0 (XML.unSourced -> XML.NodeText _v)])) | k == n =
138 any = P.label "any" $
139 P.token (const $ Just ()) Set.empty
140 anyElem ns p = P.label "anyElem" $ do
141 (n,ts) <- P.token check $ Set.singleton $ P.Tokens $ pure expected
142 p_XMLs (p $ XML.qNameLocal n) ts
144 expected = XML.Tree (XML.notSourced $ XML.NodeElem $ XML.QName ns $ XML.NCName "*") mempty
145 check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts)
146 | XML.qNameSpace e == ns
150 P.token check $ Set.singleton $ P.Tokens $ pure expected
152 expected = XML.Tree0 (XML.notSourced $ XML.NodeText $ XML.EscapedText mempty)
153 check (XML.Tree0 (XML.unSourced -> XML.NodeText t)) = Just t
155 optional = P.optional
159 fail = P.label "fail" $ P.failure Nothing mempty
160 type instance RNC.Permutation Parser = RNC.Perm Parser
161 instance RNC.Sym_Permutation Parser where
162 runPermutation (RNC.Perm value parser) = optional parser >>= f
164 -- NOTE: copy Control.Applicative.Permutations.runPermutation
165 -- to replace the commented empty below so that P.TrivialError
166 -- has the unexpected token.
167 f Nothing = maybe {-empty-}(P.token (const Nothing) Set.empty) pure value
168 f (Just p) = RNC.runPermutation p
169 toPermutation p = RNC.Perm Nothing $ pure <$> p
170 toPermutationWithDefault v p = RNC.Perm (Just v) $ pure <$> p
172 instance P.Stream XMLs where
173 type Token XMLs = XML
174 type Tokens XMLs = XMLs
178 t@(XML.Tree XML.Sourced{XML.unSourced=n} _) :< ts
179 | RNC.isIgnoredNode n -> P.take1_ ts
180 | otherwise -> Just (t, ts)
181 takeN_ n s | n <= 0 = Just (mempty, s)
184 let (ns,rs) = Seq.splitAt n s in
185 let (ko,ok) = Seq.partition (RNC.isIgnoredNode . XML.unSourced . XML.unTree) ns in
186 case P.takeN_ (Seq.length ko) rs of
187 Nothing -> Just (ok, rs)
188 Just (ns',rs') -> Just (ok<>ns', rs')
189 tokensToChunk _s = Seq.fromList
190 chunkToTokens _s = toList
191 chunkLength _s = Seq.length
192 takeWhile_ = Seq.spanl
193 -- | NOTE: unimplemented: useless since each 'XML.Node' is annotated with its 'FileSource'.
194 reachOffset = error "[BUG] P.Stream XMLs: reachOffset is not helpful, please use annotated source locations"
195 -- | NOTE: useless since each 'XML.Node' is annotated with its 'FileSource'.
196 reachOffsetNoLine = error "[BUG] P.Stream XMLs: reachOffsetNoLine is not helpful, please use annotated source locations"
197 showTokens _s toks = List.intercalate ", " $ toList $ showTree <$> toks
199 showTree :: XML -> String
200 showTree (Tree a _ts) =
201 showSourced a $ \case
202 XML.NodeAttr n -> show (remove_XMLNS_DTC n)<>"="
203 XML.NodeCDATA _t -> "cdata"
204 XML.NodeComment _c -> "comment"
205 XML.NodeElem n -> "<"<>show (remove_XMLNS_DTC n)<>">"
206 XML.NodePI n _t -> "processing-instruction"<>show n
207 XML.NodeText _t -> "text"
209 | XML.qNameSpace n == xmlns_dtc = n{XML.qNameSpace=""}
212 showSourced (Sourced path@(FileRange{fileRange_file} :| _) a) f =
213 if null fileRange_file
215 else f a <> foldMap (\p -> "\n in "<>show p) path
217 -- | @p_XMLs p xs@ returns a parser parsing @xs@ entirely with @p@,
218 -- updating 'P.stateOffset' and re-raising any exception.
219 p_XMLs :: Parser a -> XMLs -> Parser a
220 p_XMLs p stateInput = do
222 st <- P.getParserState
223 let (st', res) = P.runParser' (S.runStateT (p <* P.eof) s) P.State
224 { P.stateInput = stateInput
225 , P.stateOffset = P.stateOffset st
226 , P.stateParseErrors = []
227 , P.statePosState = P.PosState
228 { P.pstateInput = stateInput
229 , P.pstateOffset = P.stateOffset st
230 , P.pstateSourcePos = P.pstateSourcePos $ P.statePosState st
231 , P.pstateTabWidth = P.pos1
232 , P.pstateLinePrefix = ""
235 P.updateParserState (\ps -> ps{P.stateOffset = P.stateOffset st'})
240 Left (P.ParseErrorBundle errs _) ->
241 case NonEmpty.head errs of
242 P.TrivialError _o us es -> P.failure us es
244 lift $ P.ParsecT $ \ps _cok cerr _eok _eerr ->
245 cerr (P.TrivialError o us es) ps
247 P.FancyError _o es -> P.fancyFailure es
249 p_element :: XML.QName -> Parser a -> Cell XMLs -> Parser a
250 p_element n p (Sourced state_locTCT ts) = do
251 let mayNameOrFigureName
252 | n == "aside" = Nothing
255 -- NOTE: special case renaming the current XML.Pos
256 -- using the @type attribute to have positions like this:
264 , Just ty <- getFirst $ (`foldMap` ts) $ \case
265 Tree (unSourced -> XML.NodeAttr "type") xs
266 | [Tree (Sourced _ (XML.NodeText t)) _] <- toList xs
267 , Just ty <- XML.ncName $ XML.unescapeText t
270 = Just $ XML.QName xmlns_dtc ty
272 case mayNameOrFigureName of
275 S.put st{state_locTCT}
279 Just nameOrFigureName -> do
280 st@State{state_posXML} <- S.get
281 let incrPrecedingSibling name =
282 maybe (Nat1 1) succNat1 $
284 XML.pos_precedingSiblings state_posXML
286 { state_posXML = state_posXML
287 -- NOTE: in children, push current name incremented on ancestors
288 -- and reset preceding siblings.
289 { XML.pos_precedingSiblings = mempty
290 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
291 , XML.pos_ancestorsWithFigureNames =
292 XML.pos_ancestorsWithFigureNames state_posXML |>
294 , incrPrecedingSibling nameOrFigureName )
300 { state_posXML = state_posXML
301 -- NOTE: after current, increment current name
302 -- and reset ancestors.
303 { XML.pos_precedingSiblings =
304 (if n == nameOrFigureName then id
305 else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
306 Map.insertWith (const succNat1) n (Nat1 1) $
307 XML.pos_precedingSiblings state_posXML
312 instance RNC.Sym_RNC_Extra Parser where
313 none = RNC.rule "none" $ P.eof
317 Tree0 (unSourced -> XML.NodeComment c) :< ts -> do
320 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
321 EmptyL -> P.failure Nothing ex
323 ex = Set.singleton $ P.Tokens $ pure expected
324 expected = Tree0 (cell0 $ XML.NodeComment "")
325 bool = RNC.rule "bool" $ RNC.text >>= \t ->
327 "true" -> return True
328 "false" -> return False
329 _ -> P.fancyFailure $
330 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t
331 int = RNC.rule "int" $ RNC.text >>= \t ->
332 case readMaybe (TL.unpack t) of
334 Nothing -> P.fancyFailure $
335 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
336 rational = RNC.rule "rational" $ RNC.text >>= \t ->
337 case readMaybe (TL.unpack t) of
338 Just (Rational i) | 0 <= i -> return i
339 | otherwise -> P.fancyFailure $
340 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
341 Nothing -> P.fancyFailure $
342 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
343 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
344 case readMaybe (TL.unpack t) of
345 Just (Rational i) | 0 <= i -> return i
346 | otherwise -> P.fancyFailure $
347 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
348 Nothing -> P.fancyFailure $
349 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
350 nat = RNC.rule "nat" $ RNC.int >>= \i ->
353 else P.fancyFailure $ Set.singleton $
354 P.ErrorCustom $ ErrorRead_Not_Nat i
355 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
358 else P.fancyFailure $ Set.singleton $
359 P.ErrorCustom $ ErrorRead_Not_Nat1 i
360 instance DTC.Sym_DTC Parser where
361 positionXML = S.gets state_posXML
362 locationTCT = S.gets state_locTCT
365 -- ** Type 'ErrorRead'
367 = ErrorRead_EndOfInput
368 | ErrorRead_Not_Bool TL.Text
369 | ErrorRead_Not_Int TL.Text
370 | ErrorRead_Not_Nat Int
371 | ErrorRead_Not_Nat1 Int
372 | ErrorRead_Not_Rational TL.Text
373 | ErrorRead_Not_Positive TL.Text
374 deriving (Eq,Ord,Show)
375 instance P.ShowErrorComponent ErrorRead where
376 showErrorComponent = show
378 -- ** Type 'Rational'
379 -- | Wrapper to change the 'Read' instance.
380 newtype Rational = Rational Ratio.Rational
381 instance Read Rational where
383 x <- Read.step readPrec
384 Read.expectP (Read.Symbol "/")
385 y <- Read.step readPrec
386 return $ Rational (x % y)