]> Git — Sourcephile - doclang.git/blob - src/Textphile/DTC/Read/TCT.hs
Fix megaparsec-8 update
[doclang.git] / src / Textphile / DTC / Read / TCT.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Textphile.DTC.Read.TCT where
10 import Control.Applicative (Applicative(..), optional)
11 import Control.Monad (Monad(..))
12 import Data.Bool
13 import Data.Default.Class (Default(..))
14 import Data.Either (Either(..))
15 import Data.Eq (Eq(..))
16 import Data.Foldable (Foldable(..), all)
17 import Data.Function (($), (.), const, id)
18 import Data.Functor ((<$>))
19 import Data.Int (Int)
20 import Data.List.NonEmpty (NonEmpty(..))
21 import Data.Maybe (Maybe(..), maybe)
22 import Data.Monoid (Monoid(..), First(..))
23 import Data.Ord (Ord(..))
24 import Data.Ratio ((%))
25 import Data.Semigroup (Semigroup(..))
26 import Data.Sequence (ViewL(..), (|>))
27 import Data.String (String)
28 import Data.Tuple (fst, snd)
29 import Prelude (error)
30 import Text.Blaze.DTC (xmlns_dtc)
31 import Text.Read (readMaybe, Read(..))
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.Char as Char
35 import qualified Data.List as List
36 import qualified Data.List.NonEmpty as NonEmpty
37 import qualified Data.Map.Strict as Map
38 import qualified Data.Ratio as Ratio
39 import qualified Data.Sequence as Seq
40 import qualified Data.Set as Set
41 import qualified Data.Text.Lazy as TL
42 import qualified GHC.Read as Read (expectP)
43 import qualified Symantic.RNC as RNC
44 import qualified Symantic.XML as XML
45 import qualified Text.Megaparsec as P
46 import qualified Text.Read as Read
47
48 import Textphile.TCT hiding (Parser, ErrorRead)
49 import Textphile.XML (XML, XMLs)
50 import Textphile.Utils (Nat(..), Nat1(..), succNat1)
51 import qualified Textphile.DTC.Document as DTC
52 import qualified Textphile.DTC.Sym as DTC
53 import qualified Textphile.RNC as RNC
54 import qualified Textphile.XML as XML
55 import qualified Textphile.TCT.Cell as TCT
56
57 readDTC ::
58 DTC.Sym_DTC Parser =>
59 XMLs ->
60 Either (P.ParseErrorBundle XMLs ErrorRead) DTC.Document
61 readDTC stateInput = (fst <$>) $ snd $
62 P.runParser' (S.runStateT (DTC.document <* P.eof) (def::State)) P.State
63 { P.stateInput
64 , P.stateOffset = 0
65 , P.stateParseErrors = []
66 , P.statePosState =
67 error "[BUG] validateXML: getSourcePos is not helpful here, please use annotated source locations"
68 -- NOTE: reporting the node number is less helpful
69 -- than the source text line and number where the node is;
70 -- P.statePosState is only used by P.getSourcePos.
71 }
72
73 -- * Type 'State'
74 data State = State
75 { state_posXML :: XML.Pos
76 , state_locTCT :: TCT.Location
77 } deriving (Eq,Show)
78 instance Default State where
79 def = State
80 { state_posXML = def
81 , state_locTCT = def
82 }
83
84 -- * Type 'Parser'
85 type Parser = S.StateT State (P.Parsec ErrorRead XMLs)
86
87 instance RNC.Sym_Rule Parser where
88 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
89 rule _n = id
90 arg _n = pure ()
91 instance RNC.Sym_RNC Parser where
92 namespace _p _n = pure ()
93 element n p = do
94 ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
95 p_element n p ts
96 where
97 expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty
98 check (XML.Tree (XML.Sourced src (XML.NodeElem e)) ts)
99 | e == n
100 = Just $ XML.Sourced src $ removePI $ removeXMLNS $ removeSpaces ts
101 where
102 removePI xs =
103 (`Seq.filter` xs) $ \case
104 XML.Tree (XML.unSourced -> XML.NodePI{}) _ts -> False
105 _ -> True
106 removeSpaces xs =
107 if (`all` xs) $ \case
108 XML.Tree (XML.unSourced -> XML.NodeText (XML.EscapedText et)) _ts ->
109 all (\case
110 XML.EscapedPlain t -> TL.all Char.isSpace t
111 _ -> False) et
112 _ -> True
113 then (`Seq.filter` xs) $ \case
114 XML.Tree (XML.unSourced -> XML.NodeText{}) _ts -> False
115 _ -> True
116 else xs
117 removeXMLNS xs =
118 let (attrs,rest) = (`Seq.spanl` xs) $ \case
119 XML.Tree (XML.unSourced -> XML.NodeAttr{}) _ts -> True
120 _ -> False in
121 let attrs' = (`Seq.filter` attrs) $ \case
122 XML.Tree (XML.unSourced -> XML.NodeAttr a) _ts ->
123 case a of
124 XML.QName "" "xmlns" -> False
125 XML.QName ns _l -> ns /= XML.xmlns_xmlns
126 _ -> True in
127 attrs' <> rest
128 check _t = Nothing
129 attribute n p = do
130 ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
131 p_XMLs p ts
132 where
133 expected = XML.Tree0 (XML.notSourced $ XML.NodeAttr n)
134 check (XML.Tree (XML.unSourced -> XML.NodeAttr k)
135 v@(toList -> [XML.Tree0 (XML.unSourced -> XML.NodeText _v)])) | k == n =
136 Just v
137 check _t = Nothing
138 any = P.label "any" $
139 P.token (const $ Just ()) Set.empty
140 anyElem ns p = P.label "anyElem" $ do
141 (n,ts) <- P.token check $ Set.singleton $ P.Tokens $ pure expected
142 p_XMLs (p $ XML.qNameLocal n) ts
143 where
144 expected = XML.Tree (XML.notSourced $ XML.NodeElem $ XML.QName ns $ XML.NCName "*") mempty
145 check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts)
146 | XML.qNameSpace e == ns
147 = Just $ (e,ts)
148 check _t = Nothing
149 escapedText = do
150 P.token check $ Set.singleton $ P.Tokens $ pure expected
151 where
152 expected = XML.Tree0 (XML.notSourced $ XML.NodeText $ XML.EscapedText mempty)
153 check (XML.Tree0 (XML.unSourced -> XML.NodeText t)) = Just t
154 check _t = Nothing
155 optional = P.optional
156 option = P.option
157 choice = P.choice
158 try = P.try
159 fail = P.label "fail" $ P.failure Nothing mempty
160 type instance RNC.Permutation Parser = RNC.Perm Parser
161 instance RNC.Sym_Permutation Parser where
162 runPermutation (RNC.Perm value parser) = optional parser >>= f
163 where
164 -- NOTE: copy Control.Applicative.Permutations.runPermutation
165 -- to replace the commented empty below so that P.TrivialError
166 -- has the unexpected token.
167 f Nothing = maybe {-empty-}(P.token (const Nothing) Set.empty) pure value
168 f (Just p) = RNC.runPermutation p
169 toPermutation p = RNC.Perm Nothing $ pure <$> p
170 toPermutationWithDefault v p = RNC.Perm (Just v) $ pure <$> p
171
172 instance P.Stream XMLs where
173 type Token XMLs = XML
174 type Tokens XMLs = XMLs
175 take1_ s =
176 case Seq.viewl s of
177 EmptyL -> Nothing
178 t@(XML.Tree XML.Sourced{XML.unSourced=n} _) :< ts
179 | RNC.isIgnoredNode n -> P.take1_ ts
180 | otherwise -> Just (t, ts)
181 takeN_ n s | n <= 0 = Just (mempty, s)
182 | null s = Nothing
183 | otherwise =
184 let (ns,rs) = Seq.splitAt n s in
185 let (ko,ok) = Seq.partition (RNC.isIgnoredNode . XML.unSourced . XML.unTree) ns in
186 case P.takeN_ (Seq.length ko) rs of
187 Nothing -> Just (ok, rs)
188 Just (ns',rs') -> Just (ok<>ns', rs')
189 tokensToChunk _s = Seq.fromList
190 chunkToTokens _s = toList
191 chunkLength _s = Seq.length
192 takeWhile_ = Seq.spanl
193 -- | NOTE: unimplemented: useless since each 'XML.Node' is annotated with its 'FileSource'.
194 reachOffset = error "[BUG] P.Stream XMLs: reachOffset is not helpful, please use annotated source locations"
195 -- | NOTE: useless since each 'XML.Node' is annotated with its 'FileSource'.
196 reachOffsetNoLine = error "[BUG] P.Stream XMLs: reachOffsetNoLine is not helpful, please use annotated source locations"
197 showTokens _s toks = List.intercalate ", " $ toList $ showTree <$> toks
198 where
199 showTree :: XML -> String
200 showTree (Tree a _ts) =
201 showSourced a $ \case
202 XML.NodeAttr n -> show (remove_XMLNS_DTC n)<>"="
203 XML.NodeCDATA _t -> "cdata"
204 XML.NodeComment _c -> "comment"
205 XML.NodeElem n -> "<"<>show (remove_XMLNS_DTC n)<>">"
206 XML.NodePI n _t -> "processing-instruction"<>show n
207 XML.NodeText _t -> "text"
208 remove_XMLNS_DTC n
209 | XML.qNameSpace n == xmlns_dtc = n{XML.qNameSpace=""}
210 | otherwise = n
211
212 showSourced (Sourced path@(FileRange{fileRange_file} :| _) a) f =
213 if null fileRange_file
214 then f a
215 else f a <> foldMap (\p -> "\n in "<>show p) path
216
217 -- | @p_XMLs p xs@ returns a parser parsing @xs@ entirely with @p@,
218 -- updating 'P.stateOffset' and re-raising any exception.
219 p_XMLs :: Parser a -> XMLs -> Parser a
220 p_XMLs p stateInput = do
221 s <- S.get
222 st <- P.getParserState
223 let (st', res) = P.runParser' (S.runStateT (p <* P.eof) s) P.State
224 { P.stateInput = stateInput
225 , P.stateOffset = P.stateOffset st
226 , P.stateParseErrors = []
227 , P.statePosState = P.PosState
228 { P.pstateInput = stateInput
229 , P.pstateOffset = P.stateOffset st
230 , P.pstateSourcePos = P.pstateSourcePos $ P.statePosState st
231 , P.pstateTabWidth = P.pos1
232 , P.pstateLinePrefix = ""
233 }
234 }
235 P.updateParserState (\ps -> ps{P.stateOffset = P.stateOffset st'})
236 case res of
237 Right (a, s') -> do
238 S.put s'
239 return a
240 Left (P.ParseErrorBundle errs _) ->
241 case NonEmpty.head errs of
242 P.TrivialError _o us es -> P.failure us es
243 {-
244 lift $ P.ParsecT $ \ps _cok cerr _eok _eerr ->
245 cerr (P.TrivialError o us es) ps
246 -}
247 P.FancyError _o es -> P.fancyFailure es
248
249 p_element :: XML.QName -> Parser a -> Cell XMLs -> Parser a
250 p_element n p (Sourced state_locTCT ts) = do
251 let mayNameOrFigureName
252 | n == "aside" = Nothing
253 -- NOTE: skip aside.
254 | n == "figure"
255 -- NOTE: special case renaming the current XML.Pos
256 -- using the @type attribute to have positions like this:
257 -- section1.Quote1
258 -- section1.Example1
259 -- section1.Quote2
260 -- instead of:
261 -- section1.figure1
262 -- section1.figure2
263 -- section1.figure3
264 , Just ty <- getFirst $ (`foldMap` ts) $ \case
265 Tree (unSourced -> XML.NodeAttr "type") xs
266 | [Tree (Sourced _ (XML.NodeText t)) _] <- toList xs
267 , Just ty <- XML.ncName $ XML.unescapeText t
268 -> First $ Just ty
269 _ -> First Nothing
270 = Just $ XML.QName xmlns_dtc ty
271 | otherwise = Just n
272 case mayNameOrFigureName of
273 Nothing -> do
274 st <- S.get
275 S.put st{state_locTCT}
276 res <- p_XMLs p ts
277 S.put st
278 return res
279 Just nameOrFigureName -> do
280 st@State{state_posXML} <- S.get
281 let incrPrecedingSibling name =
282 maybe (Nat1 1) succNat1 $
283 Map.lookup name $
284 XML.pos_precedingSiblings state_posXML
285 S.put State
286 { state_posXML = state_posXML
287 -- NOTE: in children, push current name incremented on ancestors
288 -- and reset preceding siblings.
289 { XML.pos_precedingSiblings = mempty
290 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
291 , XML.pos_ancestorsWithFigureNames =
292 XML.pos_ancestorsWithFigureNames state_posXML |>
293 ( nameOrFigureName
294 , incrPrecedingSibling nameOrFigureName )
295 }
296 , state_locTCT
297 }
298 res <- p_XMLs p ts
299 S.put st
300 { state_posXML = state_posXML
301 -- NOTE: after current, increment current name
302 -- and reset ancestors.
303 { XML.pos_precedingSiblings =
304 (if n == nameOrFigureName then id
305 else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
306 Map.insertWith (const succNat1) n (Nat1 1) $
307 XML.pos_precedingSiblings state_posXML
308 }
309 }
310 return res
311
312 instance RNC.Sym_RNC_Extra Parser where
313 none = RNC.rule "none" $ P.eof
314 comment = do
315 s <- P.getInput
316 case Seq.viewl s of
317 Tree0 (unSourced -> XML.NodeComment c) :< ts -> do
318 P.setInput ts
319 return c
320 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
321 EmptyL -> P.failure Nothing ex
322 where
323 ex = Set.singleton $ P.Tokens $ pure expected
324 expected = Tree0 (cell0 $ XML.NodeComment "")
325 bool = RNC.rule "bool" $ RNC.text >>= \t ->
326 case t of
327 "true" -> return True
328 "false" -> return False
329 _ -> P.fancyFailure $
330 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t
331 int = RNC.rule "int" $ RNC.text >>= \t ->
332 case readMaybe (TL.unpack t) of
333 Just i -> return i
334 Nothing -> P.fancyFailure $
335 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
336 rational = RNC.rule "rational" $ RNC.text >>= \t ->
337 case readMaybe (TL.unpack t) of
338 Just (Rational i) | 0 <= i -> return i
339 | otherwise -> P.fancyFailure $
340 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
341 Nothing -> P.fancyFailure $
342 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
343 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
344 case readMaybe (TL.unpack t) of
345 Just (Rational i) | 0 <= i -> return i
346 | otherwise -> P.fancyFailure $
347 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
348 Nothing -> P.fancyFailure $
349 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
350 nat = RNC.rule "nat" $ RNC.int >>= \i ->
351 if i >= 0
352 then return $ Nat i
353 else P.fancyFailure $ Set.singleton $
354 P.ErrorCustom $ ErrorRead_Not_Nat i
355 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
356 if i > 0
357 then return $ Nat1 i
358 else P.fancyFailure $ Set.singleton $
359 P.ErrorCustom $ ErrorRead_Not_Nat1 i
360 instance DTC.Sym_DTC Parser where
361 positionXML = S.gets state_posXML
362 locationTCT = S.gets state_locTCT
363
364
365 -- ** Type 'ErrorRead'
366 data ErrorRead
367 = ErrorRead_EndOfInput
368 | ErrorRead_Not_Bool TL.Text
369 | ErrorRead_Not_Int TL.Text
370 | ErrorRead_Not_Nat Int
371 | ErrorRead_Not_Nat1 Int
372 | ErrorRead_Not_Rational TL.Text
373 | ErrorRead_Not_Positive TL.Text
374 deriving (Eq,Ord,Show)
375 instance P.ShowErrorComponent ErrorRead where
376 showErrorComponent = show
377
378 -- ** Type 'Rational'
379 -- | Wrapper to change the 'Read' instance.
380 newtype Rational = Rational Ratio.Rational
381 instance Read Rational where
382 readPrec = do
383 x <- Read.step readPrec
384 Read.expectP (Read.Symbol "/")
385 y <- Read.step readPrec
386 return $ Rational (x % y)