1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Textphile.DTC.Read.TCT where
10 import Control.Applicative (Applicative(..), optional)
11 import Control.Monad (Monad(..))
13 import Data.Default.Class (Default(..))
14 import Data.Either (Either(..))
15 import Data.Eq (Eq(..))
16 import Data.Foldable (Foldable(..), all)
17 import Data.Function (($), (.), const, id)
18 import Data.Functor ((<$>))
20 import Data.List.NonEmpty (NonEmpty(..))
21 import Data.Maybe (Maybe(..), maybe)
22 import Data.Monoid (Monoid(..), First(..))
23 import Data.Ord (Ord(..))
24 import Data.Ratio ((%))
25 import Data.Semigroup (Semigroup(..))
26 import Data.Sequence (ViewL(..), (|>))
27 import Data.String (String)
28 import Data.Tuple (fst, snd)
29 import Prelude (error)
30 import Text.Blaze.DTC (xmlns_dtc)
31 import Text.Read (readMaybe, Read(..))
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.Char as Char
35 import qualified Data.List as List
36 import qualified Data.List.NonEmpty as NonEmpty
37 import qualified Data.Map.Strict as Map
38 import qualified Data.Ratio as Ratio
39 import qualified Data.Sequence as Seq
40 import qualified Data.Set as Set
41 import qualified Data.Text.Lazy as TL
42 import qualified GHC.Read as Read (expectP)
43 import qualified Symantic.RNC as RNC
44 import qualified Symantic.XML as XML
45 import qualified Text.Megaparsec as P
46 import qualified Text.Read as Read
48 import Textphile.TCT hiding (Parser, ErrorRead)
49 import Textphile.XML (XML, XMLs)
50 import Textphile.Utils (Nat(..), Nat1(..), succNat1)
51 import qualified Textphile.DTC.Document as DTC
52 import qualified Textphile.DTC.Sym as DTC
53 import qualified Textphile.RNC as RNC
54 import qualified Textphile.XML as XML
55 import qualified Textphile.TCT.Cell as TCT
60 Either (P.ParseErrorBundle XMLs ErrorRead) DTC.Document
61 readDTC stateInput = (fst <$>) $ snd $
62 P.runParser' (S.runStateT (DTC.document <* P.eof) (def::State)) P.State
65 , P.stateParseErrors = []
67 error "[BUG] validateXML: getSourcePos is not helpful here, please use annotated source locations"
68 -- NOTE: reporting the node number is less helpful
69 -- than the source text line and number where the node is;
70 -- P.statePosState is only used by P.getSourcePos.
75 { state_posXML :: XML.Pos
76 , state_locTCT :: TCT.Location
78 instance Default State where
85 type Parser = S.StateT State (P.Parsec ErrorRead XMLs)
87 instance RNC.Sym_Rule Parser where
88 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
91 instance RNC.Sym_RNC Parser where
92 namespace _p _n = pure ()
94 ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
97 expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty
98 check (XML.Tree (XML.Sourced src (XML.NodeElem e)) ts)
100 = Just $ XML.Sourced src $ removePI $ removeXMLNS $ removeSpaces ts
103 (`Seq.filter` xs) $ \case
104 XML.Tree (XML.unSourced -> XML.NodePI{}) _ts -> False
107 if (`all` xs) $ \case
108 XML.Tree (XML.unSourced -> XML.NodeText (XML.EscapedText et)) _ts ->
110 XML.EscapedPlain t -> TL.all Char.isSpace t
113 then (`Seq.filter` xs) $ \case
114 XML.Tree (XML.unSourced -> XML.NodeText{}) _ts -> False
118 let (attrs,rest) = (`Seq.spanl` xs) $ \case
119 XML.Tree (XML.unSourced -> XML.NodeAttr{}) _ts -> True
121 let attrs' = (`Seq.filter` attrs) $ \case
122 XML.Tree (XML.unSourced -> XML.NodeAttr a) _ts ->
124 XML.QName "" "xmlns" -> False
125 XML.QName ns _l -> ns /= XML.xmlns_xmlns
130 ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
133 expected = XML.Tree0 (XML.notSourced $ XML.NodeAttr n)
134 check (XML.Tree (XML.unSourced -> XML.NodeAttr k)
135 v@(toList -> [XML.Tree0 (XML.unSourced -> XML.NodeText _v)])) | k == n =
138 any = P.label "any" $
139 P.token (const $ Just ()) Set.empty
140 anyElem ns p = P.label "anyElem" $ do
141 (n,ts) <- P.token check $ Set.singleton $ P.Tokens $ pure expected
142 p_XMLs (p $ XML.qNameLocal n) ts
144 expected = XML.Tree (XML.notSourced $ XML.NodeElem $ XML.QName ns $ XML.NCName "*") mempty
145 check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts)
146 | XML.qNameSpace e == ns
150 P.token check $ Set.singleton $ P.Tokens $ pure expected
152 expected = XML.Tree0 (XML.notSourced $ XML.NodeText $ XML.EscapedText mempty)
153 check (XML.Tree0 (XML.unSourced -> XML.NodeText t)) = Just t
155 optional = P.optional
159 fail = P.label "fail" $ P.failure Nothing mempty
160 type instance RNC.Permutation Parser = RNC.Perm Parser
161 instance RNC.Sym_Permutation Parser where
162 runPermutation (RNC.Perm value parser) = optional parser >>= f
164 -- NOTE: copy Control.Applicative.Permutations.runPermutation
165 -- to replace the commented empty below so that P.TrivialError
166 -- has the unexpected token.
167 f Nothing = maybe {-empty-}(P.token (const Nothing) Set.empty) pure value
168 f (Just p) = RNC.runPermutation p
169 toPermutation p = RNC.Perm Nothing $ pure <$> p
170 toPermutationWithDefault v p = RNC.Perm (Just v) $ pure <$> p
172 instance P.Stream XMLs where
173 type Token XMLs = XML
174 type Tokens XMLs = XMLs
178 t@(XML.Tree XML.Sourced{XML.unSourced=n} _) :< ts
179 | RNC.isIgnoredNode n -> P.take1_ ts
180 | otherwise -> Just (t, ts)
181 takeN_ n s | n <= 0 = Just (mempty, s)
184 let (ns,rs) = Seq.splitAt n s in
185 let (ko,ok) = Seq.partition (RNC.isIgnoredNode . XML.unSourced . XML.unTree) ns in
186 case P.takeN_ (Seq.length ko) rs of
187 Nothing -> Just (ok, rs)
188 Just (ns',rs') -> Just (ok<>ns', rs')
189 tokensToChunk _s = Seq.fromList
190 chunkToTokens _s = toList
191 chunkLength _s = Seq.length
192 takeWhile_ = Seq.spanl
193 -- | NOTE: unimplemented: useless since each 'XML.Node' is annotated with its 'FileSource'.
194 reachOffset = error "[BUG] P.Stream XMLs: reachOffset is not helpful, please use annotated source locations"
195 -- | NOTE: useless since each 'XML.Node' is annotated with its 'FileSource'.
196 reachOffsetNoLine = error "[BUG] P.Stream XMLs: reachOffsetNoLine is not helpful, please use annotated source locations"
197 showTokens _s toks = List.intercalate ", " $ toList $ showTree <$> toks
199 showTree :: XML -> String
200 showTree (Tree a _ts) =
201 showSourced a $ \case
202 XML.NodeAttr n -> show (remove_XMLNS_DTC n)<>"="
203 XML.NodeCDATA _t -> "cdata"
204 XML.NodeComment _c -> "comment"
205 XML.NodeElem n -> "<"<>show (remove_XMLNS_DTC n)<>">"
206 XML.NodePI n _t -> "processing-instruction"<>show n
207 XML.NodeText _t -> "text"
209 | XML.qNameSpace n == xmlns_dtc = n{XML.qNameSpace=""}
212 showSourced (Sourced path@(FileRange{fileRange_file} :| _) a) f =
213 if null fileRange_file
215 else f a <> foldMap (\p -> "\n in "<>show p) path
217 -- | @p_XMLs p xs@ returns a parser parsing @xs@ entirely with @p@,
218 -- updating 'P.stateOffset' and re-raising any exception.
219 p_XMLs :: Parser a -> XMLs -> Parser a
220 p_XMLs p stateInput = do
222 st <- P.getParserState
223 let (st', res) = P.runParser' (S.runStateT (p <* P.eof) s) P.State
224 { P.stateInput = stateInput
225 , P.stateOffset = P.stateOffset st
226 , P.statePosState = P.PosState
227 { P.pstateInput = stateInput
228 , P.pstateOffset = P.stateOffset st
229 , P.pstateSourcePos = P.pstateSourcePos $ P.statePosState st
230 , P.pstateTabWidth = P.pos1
231 , P.pstateLinePrefix = ""
234 P.updateParserState (\ps -> ps{P.stateOffset = P.stateOffset st'})
239 Left (P.ParseErrorBundle errs _) ->
240 case NonEmpty.head errs of
241 P.TrivialError _o us es -> P.failure us es
243 lift $ P.ParsecT $ \ps _cok cerr _eok _eerr ->
244 cerr (P.TrivialError o us es) ps
246 P.FancyError _o es -> P.fancyFailure es
248 p_element :: XML.QName -> Parser a -> Cell XMLs -> Parser a
249 p_element n p (Sourced state_locTCT ts) = do
250 let mayNameOrFigureName
251 | n == "aside" = Nothing
254 -- NOTE: special case renaming the current XML.Pos
255 -- using the @type attribute to have positions like this:
263 , Just ty <- getFirst $ (`foldMap` ts) $ \case
264 Tree (unSourced -> XML.NodeAttr "type") xs
265 | [Tree (Sourced _ (XML.NodeText t)) _] <- toList xs
266 , Just ty <- XML.ncName $ XML.unescapeText t
269 = Just $ XML.QName xmlns_dtc ty
271 case mayNameOrFigureName of
274 S.put st{state_locTCT}
278 Just nameOrFigureName -> do
279 st@State{state_posXML} <- S.get
280 let incrPrecedingSibling name =
281 maybe (Nat1 1) succNat1 $
283 XML.pos_precedingSiblings state_posXML
285 { state_posXML = state_posXML
286 -- NOTE: in children, push current name incremented on ancestors
287 -- and reset preceding siblings.
288 { XML.pos_precedingSiblings = mempty
289 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
290 , XML.pos_ancestorsWithFigureNames =
291 XML.pos_ancestorsWithFigureNames state_posXML |>
293 , incrPrecedingSibling nameOrFigureName )
299 { state_posXML = state_posXML
300 -- NOTE: after current, increment current name
301 -- and reset ancestors.
302 { XML.pos_precedingSiblings =
303 (if n == nameOrFigureName then id
304 else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
305 Map.insertWith (const succNat1) n (Nat1 1) $
306 XML.pos_precedingSiblings state_posXML
311 instance RNC.Sym_RNC_Extra Parser where
312 none = RNC.rule "none" $ P.eof
316 Tree0 (unSourced -> XML.NodeComment c) :< ts -> do
319 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
320 EmptyL -> P.failure Nothing ex
322 ex = Set.singleton $ P.Tokens $ pure expected
323 expected = Tree0 (cell0 $ XML.NodeComment "")
324 bool = RNC.rule "bool" $ RNC.text >>= \t ->
326 "true" -> return True
327 "false" -> return False
328 _ -> P.fancyFailure $
329 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t
330 int = RNC.rule "int" $ RNC.text >>= \t ->
331 case readMaybe (TL.unpack t) of
333 Nothing -> P.fancyFailure $
334 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
335 rational = RNC.rule "rational" $ RNC.text >>= \t ->
336 case readMaybe (TL.unpack t) of
337 Just (Rational i) | 0 <= i -> return i
338 | otherwise -> P.fancyFailure $
339 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
340 Nothing -> P.fancyFailure $
341 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
342 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
343 case readMaybe (TL.unpack t) of
344 Just (Rational i) | 0 <= i -> return i
345 | otherwise -> P.fancyFailure $
346 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
347 Nothing -> P.fancyFailure $
348 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
349 nat = RNC.rule "nat" $ RNC.int >>= \i ->
352 else P.fancyFailure $ Set.singleton $
353 P.ErrorCustom $ ErrorRead_Not_Nat i
354 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
357 else P.fancyFailure $ Set.singleton $
358 P.ErrorCustom $ ErrorRead_Not_Nat1 i
359 instance DTC.Sym_DTC Parser where
360 positionXML = S.gets state_posXML
361 locationTCT = S.gets state_locTCT
364 -- ** Type 'ErrorRead'
366 = ErrorRead_EndOfInput
367 | ErrorRead_Not_Bool TL.Text
368 | ErrorRead_Not_Int TL.Text
369 | ErrorRead_Not_Nat Int
370 | ErrorRead_Not_Nat1 Int
371 | ErrorRead_Not_Rational TL.Text
372 | ErrorRead_Not_Positive TL.Text
373 deriving (Eq,Ord,Show)
374 instance P.ShowErrorComponent ErrorRead where
375 showErrorComponent = show
377 -- ** Type 'Rational'
378 -- | Wrapper to change the 'Read' instance.
379 newtype Rational = Rational Ratio.Rational
380 instance Read Rational where
382 x <- Read.step readPrec
383 Read.expectP (Read.Symbol "/")
384 y <- Read.step readPrec
385 return $ Rational (x % y)