]> Git — Sourcephile - doclang.git/blob - Hdoc/DTC/Read/TCT.hs
Update to megaparsec-7 and new symantic-xml
[doclang.git] / Hdoc / DTC / Read / TCT.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Hdoc.DTC.Read.TCT where
10 import Control.Applicative (Applicative(..), optional)
11 import Control.Monad (Monad(..))
12 import Data.Bool
13 import Data.Default.Class (Default(..))
14 import Data.Either (Either(..))
15 import Data.Eq (Eq(..))
16 import Data.Foldable (Foldable(..), all)
17 import Data.Function (($), (.), const, id)
18 import Data.Functor ((<$>))
19 import Data.Int (Int)
20 import Data.List.NonEmpty (NonEmpty(..))
21 import Data.Maybe (Maybe(..), maybe)
22 import Data.Monoid (Monoid(..), First(..))
23 import Data.Ord (Ord(..))
24 import Data.Ratio ((%))
25 import Data.Semigroup (Semigroup(..))
26 import Data.Sequence (ViewL(..), (|>))
27 import Data.String (String)
28 import Data.Tuple (fst, snd)
29 import Prelude (error)
30 import Text.Blaze.DTC (xmlns_dtc)
31 import Text.Read (readMaybe, Read(..))
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.Char as Char
35 import qualified Data.List as List
36 import qualified Data.List.NonEmpty as NonEmpty
37 import qualified Data.Map.Strict as Map
38 import qualified Data.Ratio as Ratio
39 import qualified Data.Sequence as Seq
40 import qualified Data.Set as Set
41 import qualified Data.Text.Lazy as TL
42 import qualified GHC.Read as Read (expectP)
43 import qualified Language.Symantic.RNC as RNC
44 import qualified Language.Symantic.XML as XML
45 import qualified Text.Megaparsec as P
46 import qualified Text.Read as Read
47
48 import Hdoc.TCT hiding (Parser, ErrorRead)
49 import Hdoc.XML (XML, XMLs)
50 import Hdoc.Utils (Nat(..), Nat1(..), succNat1)
51 import qualified Hdoc.DTC.Document as DTC
52 import qualified Hdoc.DTC.Sym as DTC
53 import qualified Hdoc.RNC as RNC
54 import qualified Hdoc.XML as XML
55 import qualified Hdoc.TCT.Cell as TCT
56
57 readDTC ::
58 DTC.Sym_DTC Parser =>
59 XMLs ->
60 Either (P.ParseErrorBundle XMLs ErrorRead) DTC.Document
61 readDTC stateInput = (fst <$>) $ snd $
62 P.runParser' (S.runStateT (DTC.document <* P.eof) (def::State)) P.State
63 { P.stateInput
64 , P.stateOffset = 0
65 , P.statePosState =
66 error "[BUG] validateXML: getSourcePos is not helpful here, please use annotated source locations"
67 -- NOTE: reporting the node number is less helpful
68 -- than the source text line and number where the node is;
69 -- P.statePosState is only used by P.getSourcePos.
70 }
71
72 -- * Type 'State'
73 data State = State
74 { state_posXML :: XML.Pos
75 , state_locTCT :: TCT.Location
76 } deriving (Eq,Show)
77 instance Default State where
78 def = State
79 { state_posXML = def
80 , state_locTCT = def
81 }
82
83 -- * Type 'Parser'
84 type Parser = S.StateT State (P.Parsec ErrorRead XMLs)
85
86 instance RNC.Sym_Rule Parser where
87 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
88 rule _n = id
89 arg _n = pure ()
90 instance RNC.Sym_RNC Parser where
91 namespace _p _n = pure ()
92 element n p = do
93 ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
94 p_element n p ts
95 where
96 expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty
97 check (XML.Tree (XML.Sourced src (XML.NodeElem e)) ts)
98 | e == n
99 = Just $ XML.Sourced src $ removePI $ removeXMLNS $ removeSpaces ts
100 where
101 removePI xs =
102 (`Seq.filter` xs) $ \case
103 XML.Tree (XML.unSourced -> XML.NodePI{}) _ts -> False
104 _ -> True
105 removeSpaces xs =
106 if (`all` xs) $ \case
107 XML.Tree (XML.unSourced -> XML.NodeText (XML.EscapedText et)) _ts ->
108 all (\case
109 XML.EscapedPlain t -> TL.all Char.isSpace t
110 _ -> False) et
111 _ -> True
112 then (`Seq.filter` xs) $ \case
113 XML.Tree (XML.unSourced -> XML.NodeText{}) _ts -> False
114 _ -> True
115 else xs
116 removeXMLNS xs =
117 let (attrs,rest) = (`Seq.spanl` xs) $ \case
118 XML.Tree (XML.unSourced -> XML.NodeAttr{}) _ts -> True
119 _ -> False in
120 let attrs' = (`Seq.filter` attrs) $ \case
121 XML.Tree (XML.unSourced -> XML.NodeAttr a) _ts ->
122 case a of
123 XML.QName "" "xmlns" -> False
124 XML.QName ns _l -> ns /= XML.xmlns_xmlns
125 _ -> True in
126 attrs' <> rest
127 check _t = Nothing
128 attribute n p = do
129 ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
130 p_XMLs p ts
131 where
132 expected = XML.Tree0 (XML.notSourced $ XML.NodeAttr n)
133 check (XML.Tree (XML.unSourced -> XML.NodeAttr k)
134 v@(toList -> [XML.Tree0 (XML.unSourced -> XML.NodeText _v)])) | k == n =
135 Just v
136 check _t = Nothing
137 any = P.label "any" $
138 P.token (const $ Just ()) Set.empty
139 anyElem ns p = P.label "anyElem" $ do
140 (n,ts) <- P.token check $ Set.singleton $ P.Tokens $ pure expected
141 p_XMLs (p $ XML.qNameLocal n) ts
142 where
143 expected = XML.Tree (XML.notSourced $ XML.NodeElem $ XML.QName ns $ XML.NCName "*") mempty
144 check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts)
145 | XML.qNameSpace e == ns
146 = Just $ (e,ts)
147 check _t = Nothing
148 escapedText = do
149 P.token check $ Set.singleton $ P.Tokens $ pure expected
150 where
151 expected = XML.Tree0 (XML.notSourced $ XML.NodeText $ XML.EscapedText mempty)
152 check (XML.Tree0 (XML.unSourced -> XML.NodeText t)) = Just t
153 check _t = Nothing
154 optional = P.optional
155 option = P.option
156 choice = P.choice
157 try = P.try
158 fail = P.label "fail" $ P.failure Nothing mempty
159 type instance RNC.Permutation Parser = RNC.Perm Parser
160 instance RNC.Sym_Permutation Parser where
161 runPermutation (RNC.Perm value parser) = optional parser >>= f
162 where
163 -- NOTE: copy Control.Applicative.Permutations.runPermutation
164 -- to replace the commented empty below so that P.TrivialError
165 -- has the unexpected token.
166 f Nothing = maybe {-empty-}(P.token (const Nothing) Set.empty) pure value
167 f (Just p) = RNC.runPermutation p
168 toPermutation p = RNC.Perm Nothing $ pure <$> p
169 toPermutationWithDefault v p = RNC.Perm (Just v) $ pure <$> p
170
171 instance P.Stream XMLs where
172 type Token XMLs = XML
173 type Tokens XMLs = XMLs
174 take1_ s =
175 case Seq.viewl s of
176 EmptyL -> Nothing
177 t@(XML.Tree XML.Sourced{XML.unSourced=n} _) :< ts
178 | RNC.isIgnoredNode n -> P.take1_ ts
179 | otherwise -> Just (t, ts)
180 takeN_ n s | n <= 0 = Just (mempty, s)
181 | null s = Nothing
182 | otherwise =
183 let (ns,rs) = Seq.splitAt n s in
184 let (ko,ok) = Seq.partition (RNC.isIgnoredNode . XML.unSourced . XML.unTree) ns in
185 case P.takeN_ (Seq.length ko) rs of
186 Nothing -> Just (ok, rs)
187 Just (ns',rs') -> Just (ok<>ns', rs')
188 tokensToChunk _s = Seq.fromList
189 chunkToTokens _s = toList
190 chunkLength _s = Seq.length
191 takeWhile_ = Seq.spanl
192 -- | NOTE: unimplemented: useless since each 'XML.Node' is annotated with its 'FileSource'.
193 reachOffset = error "[BUG] P.Stream XMLs: reachOffset is not helpful, please use annotated source locations"
194 -- | NOTE: useless since each 'XML.Node' is annotated with its 'FileSource'.
195 reachOffsetNoLine = error "[BUG] P.Stream XMLs: reachOffsetNoLine is not helpful, please use annotated source locations"
196 showTokens _s toks = List.intercalate ", " $ toList $ showTree <$> toks
197 where
198 showTree :: XML -> String
199 showTree (Tree a _ts) =
200 showSourced a $ \case
201 XML.NodeAttr n -> show (remove_XMLNS_DTC n)<>"="
202 XML.NodeCDATA _t -> "cdata"
203 XML.NodeComment _c -> "comment"
204 XML.NodeElem n -> "<"<>show (remove_XMLNS_DTC n)<>">"
205 XML.NodePI n _t -> "processing-instruction"<>show n
206 XML.NodeText _t -> "text"
207 remove_XMLNS_DTC n
208 | XML.qNameSpace n == xmlns_dtc = n{XML.qNameSpace=""}
209 | otherwise = n
210
211 showSourced (Sourced path@(FileRange{fileRange_file} :| _) a) f =
212 if null fileRange_file
213 then f a
214 else f a <> foldMap (\p -> "\n in "<>show p) path
215
216 -- | @p_XMLs p xs@ returns a parser parsing @xs@ entirely with @p@,
217 -- updating 'P.stateOffset' and re-raising any exception.
218 p_XMLs :: Parser a -> XMLs -> Parser a
219 p_XMLs p stateInput = do
220 s <- S.get
221 st <- P.getParserState
222 let (st', res) = P.runParser' (S.runStateT (p <* P.eof) s) P.State
223 { P.stateInput = stateInput
224 , P.stateOffset = P.stateOffset st
225 , P.statePosState = P.PosState
226 { P.pstateInput = stateInput
227 , P.pstateOffset = P.stateOffset st
228 , P.pstateSourcePos = P.pstateSourcePos $ P.statePosState st
229 , P.pstateTabWidth = P.pos1
230 , P.pstateLinePrefix = ""
231 }
232 }
233 P.updateParserState (\ps -> ps{P.stateOffset = P.stateOffset st'})
234 case res of
235 Right (a, s') -> do
236 S.put s'
237 return a
238 Left (P.ParseErrorBundle errs _) ->
239 case NonEmpty.head errs of
240 P.TrivialError _o us es -> P.failure us es
241 {-
242 lift $ P.ParsecT $ \ps _cok cerr _eok _eerr ->
243 cerr (P.TrivialError o us es) ps
244 -}
245 P.FancyError _o es -> P.fancyFailure es
246
247 p_element :: XML.QName -> Parser a -> Cell XMLs -> Parser a
248 p_element n p (Sourced state_locTCT ts) = do
249 let mayNameOrFigureName
250 | n == "aside" = Nothing
251 -- NOTE: skip aside.
252 | n == "figure"
253 -- NOTE: special case renaming the current XML.Pos
254 -- using the @type attribute to have positions like this:
255 -- section1.Quote1
256 -- section1.Example1
257 -- section1.Quote2
258 -- instead of:
259 -- section1.figure1
260 -- section1.figure2
261 -- section1.figure3
262 , Just ty <- getFirst $ (`foldMap` ts) $ \case
263 Tree (unSourced -> XML.NodeAttr "type") xs
264 | [Tree (Sourced _ (XML.NodeText t)) _] <- toList xs
265 , Just ty <- XML.ncName $ XML.unescapeText t
266 -> First $ Just ty
267 _ -> First Nothing
268 = Just $ XML.QName xmlns_dtc ty
269 | otherwise = Just n
270 case mayNameOrFigureName of
271 Nothing -> do
272 st <- S.get
273 S.put st{state_locTCT}
274 res <- p_XMLs p ts
275 S.put st
276 return res
277 Just nameOrFigureName -> do
278 st@State{state_posXML} <- S.get
279 let incrPrecedingSibling name =
280 maybe (Nat1 1) succNat1 $
281 Map.lookup name $
282 XML.pos_precedingSiblings state_posXML
283 S.put State
284 { state_posXML = state_posXML
285 -- NOTE: in children, push current name incremented on ancestors
286 -- and reset preceding siblings.
287 { XML.pos_precedingSiblings = mempty
288 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
289 , XML.pos_ancestorsWithFigureNames =
290 XML.pos_ancestorsWithFigureNames state_posXML |>
291 ( nameOrFigureName
292 , incrPrecedingSibling nameOrFigureName )
293 }
294 , state_locTCT
295 }
296 res <- p_XMLs p ts
297 S.put st
298 { state_posXML = state_posXML
299 -- NOTE: after current, increment current name
300 -- and reset ancestors.
301 { XML.pos_precedingSiblings =
302 (if n == nameOrFigureName then id
303 else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
304 Map.insertWith (const succNat1) n (Nat1 1) $
305 XML.pos_precedingSiblings state_posXML
306 }
307 }
308 return res
309
310 instance RNC.Sym_RNC_Extra Parser where
311 none = RNC.rule "none" $ P.eof
312 comment = do
313 s <- P.getInput
314 case Seq.viewl s of
315 Tree0 (unSourced -> XML.NodeComment c) :< ts -> do
316 P.setInput ts
317 return c
318 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
319 EmptyL -> P.failure Nothing ex
320 where
321 ex = Set.singleton $ P.Tokens $ pure expected
322 expected = Tree0 (cell0 $ XML.NodeComment "")
323 bool = RNC.rule "bool" $ RNC.text >>= \t ->
324 case t of
325 "true" -> return True
326 "false" -> return False
327 _ -> P.fancyFailure $
328 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t
329 int = RNC.rule "int" $ RNC.text >>= \t ->
330 case readMaybe (TL.unpack t) of
331 Just i -> return i
332 Nothing -> P.fancyFailure $
333 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
334 rational = RNC.rule "rational" $ RNC.text >>= \t ->
335 case readMaybe (TL.unpack t) of
336 Just (Rational i) | 0 <= i -> return i
337 | otherwise -> P.fancyFailure $
338 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
339 Nothing -> P.fancyFailure $
340 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
341 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
342 case readMaybe (TL.unpack t) of
343 Just (Rational i) | 0 <= i -> return i
344 | otherwise -> P.fancyFailure $
345 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
346 Nothing -> P.fancyFailure $
347 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
348 nat = RNC.rule "nat" $ RNC.int >>= \i ->
349 if i >= 0
350 then return $ Nat i
351 else P.fancyFailure $ Set.singleton $
352 P.ErrorCustom $ ErrorRead_Not_Nat i
353 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
354 if i > 0
355 then return $ Nat1 i
356 else P.fancyFailure $ Set.singleton $
357 P.ErrorCustom $ ErrorRead_Not_Nat1 i
358 instance DTC.Sym_DTC Parser where
359 positionXML = S.gets state_posXML
360 locationTCT = S.gets state_locTCT
361
362
363 -- ** Type 'ErrorRead'
364 data ErrorRead
365 = ErrorRead_EndOfInput
366 | ErrorRead_Not_Bool TL.Text
367 | ErrorRead_Not_Int TL.Text
368 | ErrorRead_Not_Nat Int
369 | ErrorRead_Not_Nat1 Int
370 | ErrorRead_Not_Rational TL.Text
371 | ErrorRead_Not_Positive TL.Text
372 deriving (Eq,Ord,Show)
373 instance P.ShowErrorComponent ErrorRead where
374 showErrorComponent = show
375
376 -- ** Type 'Rational'
377 -- | Wrapper to change the 'Read' instance.
378 newtype Rational = Rational Ratio.Rational
379 instance Read Rational where
380 readPrec = do
381 x <- Read.step readPrec
382 Read.expectP (Read.Symbol "/")
383 y <- Read.step readPrec
384 return $ Rational (x % y)