]> Git — Sourcephile - doclang.git/blob - Hdoc/DTC/Read/TCT.hs
XML: use symantic-xml
[doclang.git] / Hdoc / DTC / Read / TCT.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Hdoc.DTC.Read.TCT where
10
11 import Control.Applicative (Applicative(..))
12 import Control.Monad (Monad(..))
13 import Data.Bool
14 import Data.Default.Class (Default(..))
15 import Data.Either (Either(..))
16 import Data.Eq (Eq(..))
17 import Data.Foldable (Foldable(..))
18 import Data.Function (($), (.), const, id)
19 import Data.Functor ((<$>), (<$))
20 import Data.Int (Int)
21 import Data.List.NonEmpty (NonEmpty(..))
22 import Data.Maybe (Maybe(..), maybe)
23 import Data.Monoid (Monoid(..), First(..))
24 import Data.Ord (Ord(..))
25 import Data.Proxy (Proxy(..))
26 import Data.Ratio ((%))
27 import Data.Semigroup (Semigroup(..))
28 import Data.Sequence (ViewL(..), (|>))
29 import Data.String (String)
30 import Data.Tuple (fst, snd)
31 import Text.Blaze.DTC (xmlns_dtc)
32 import Text.Read (readMaybe, Read(..))
33 import Text.Show (Show(..))
34 import qualified Control.Monad.Trans.State as S
35 import qualified Data.List as List
36 import qualified Data.Map.Strict as Map
37 import qualified Data.Ratio as Ratio
38 import qualified Data.Sequence as Seq
39 import qualified Data.Set as Set
40 import qualified Data.Text.Lazy as TL
41 import qualified GHC.Read as Read (expectP)
42 import qualified Language.Symantic.RNC as RNC
43 import qualified Language.Symantic.XML as XML
44 import qualified Text.Megaparsec as P
45 import qualified Text.Megaparsec.Perm as P
46 import qualified Text.Read as Read
47
48 import Hdoc.TCT hiding (Parser, ErrorRead)
49 import Hdoc.Utils (Nat(..), Nat1(..), succNat1)
50 import qualified Hdoc.DTC.Document as DTC
51 import qualified Hdoc.DTC.Sym as DTC
52 import qualified Hdoc.RNC as RNC
53 import qualified Hdoc.XML as XML
54 import qualified Hdoc.TCT.Cell as TCT
55
56 -- * Type 'State'
57 data State = State
58 { state_posXML :: XML.Pos
59 , state_locTCT :: TCT.Location
60 -- ^ Unfortunately Megaparsec's 'P.statePos'
61 -- is not a good fit to encode 'TCT.Location'.
62 } deriving (Eq,Show)
63 instance Default State where
64 def = State
65 { state_posXML = def
66 , state_locTCT = def
67 }
68
69 -- * Type 'Parser'
70 type Parser = S.StateT State (P.Parsec ErrorRead XML.XMLs)
71
72 instance RNC.Sym_Rule Parser where
73 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
74 rule _n = id
75 arg _n = pure ()
76 instance RNC.Sym_RNC Parser where
77 namespace _p _n = pure ()
78 fail = P.label "fail" $ P.failure Nothing mempty
79 any = P.label "any" $ p_satisfyMaybe $ const $ Just ()
80 anyElem ns p = P.label "anyElem" $ do
81 Sourced state_locTCT (n, ts) <- P.token check $ Just expected
82 parserElement n (p $ XML.qNameLocal n) (Sourced state_locTCT ts)
83 where
84 expected = XML.Tree (cell0 $ XML.NodeElem $ XML.QName ns $ XML.NCName "*") mempty
85 check (Tree cell@(unSourced -> XML.NodeElem e) ts)
86 | XML.qNameSpace e == ns
87 = Right $ (e,ts) <$ cell
88 check t = Left
89 ( Just $ P.Tokens $ pure t
90 , Set.singleton $ P.Tokens $ pure expected )
91 element n p = do
92 ts <- P.token check $ Just expected
93 parserElement n p ts
94 where
95 expected = Tree (cell0 $ XML.NodeElem n) mempty
96 check (Tree cell@(unSourced -> XML.NodeElem e) ts)
97 | e == n = Right (ts <$ cell)
98 check t = Left
99 ( Just $ P.Tokens $ pure t
100 , Set.singleton $ P.Tokens $ pure expected )
101 {-
102 element n p = do
103 ts <- P.token check $ Just expected
104 parserElement n p ts
105 where
106 expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty
107 check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts)
108 | e == n
109 = Right $ removePI $ removeXMLNS $ removeSpaces ts
110 where
111 removePI xs =
112 (`Seq.filter` xs) $ \case
113 XML.Tree (XML.unSourced -> XML.NodePI{}) _ts -> False
114 _ -> True
115 removeSpaces xs =
116 if (`all` xs) $ \case
117 XML.Tree (XML.unSourced -> XML.NodeText txt) _ts ->
118 all (\case
119 XML.EscapedPlain t -> TL.all Char.isSpace t
120 _ -> False) txt
121 _ -> True
122 then (`Seq.filter` xs) $ \case
123 XML.Tree (XML.unSourced -> XML.NodeText{}) _ts -> False
124 _ -> True
125 else xs
126 removeXMLNS xs =
127 let (attrs,rest) = (`Seq.spanl` xs) $ \case
128 XML.Tree (XML.unSourced -> XML.NodeAttr{}) _ts -> True
129 _ -> False in
130 let attrs' = (`Seq.filter` attrs) $ \case
131 XML.Tree (XML.unSourced -> XML.NodeAttr a) _ts ->
132 case a of
133 XML.QName "" "xmlns" -> False
134 XML.QName ns _l -> ns /= XML.xmlns_xmlns
135 _ -> True in
136 attrs' <> rest
137 check t = Left
138 ( Just $ P.Tokens $ pure t
139 , Set.singleton $ P.Tokens $ pure expected )
140 -}
141 attribute n p = do
142 v <- P.token check $ Just expected
143 parser p v
144 where
145 expected = Tree0 (cell0 $ XML.NodeAttr n)
146 check (Tree (unSourced -> XML.NodeAttr k) v)
147 | [Tree (Sourced _ (XML.NodeText _v)) _] <- toList v
148 , k == n = Right v
149 check t = Left
150 ( Just $ P.Tokens $ pure t
151 , Set.singleton $ P.Tokens $ pure expected )
152 escapedText = do
153 P.token check (Just expected)
154 <* setPosOnNextNode
155 where
156 expected = Tree0 (cell0 $ XML.NodeText mempty)
157 check (Tree0 (unSourced -> XML.NodeText t)) = Right t
158 check t = Left
159 ( Just $ P.Tokens $ pure t
160 , Set.singleton $ P.Tokens $ pure expected )
161 try = P.try
162 optional = P.optional
163 option = P.option
164 choice = P.choice
165 {-
166 instance Alternative Parser where
167 (<|>) = (P.<|>)
168 many = P.many
169 some = P.some
170 -}
171 instance RNC.Sym_RNC_Extra Parser where
172 none = P.label "none" $ P.eof
173 comment = do
174 s <- P.getInput
175 case Seq.viewl s of
176 Tree0 (unSourced -> XML.NodeComment c) :< ts -> do
177 P.setInput ts
178 c <$ setPosOnNextNode
179 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
180 EmptyL -> P.failure Nothing ex
181 where
182 ex = Set.singleton $ P.Tokens $ pure expected
183 expected = Tree0 (cell0 $ XML.NodeComment "")
184 bool = RNC.rule "bool" $ RNC.text >>= \t ->
185 case t of
186 "true" -> return True
187 "false" -> return False
188 _ -> P.fancyFailure $
189 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t
190 int = RNC.rule "int" $ RNC.text >>= \t ->
191 case readMaybe (TL.unpack t) of
192 Just i -> return i
193 Nothing -> P.fancyFailure $
194 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
195 rational = RNC.rule "rational" $ RNC.text >>= \t ->
196 case readMaybe (TL.unpack t) of
197 Just (Rational i) | 0 <= i -> return i
198 | otherwise -> P.fancyFailure $
199 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
200 Nothing -> P.fancyFailure $
201 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
202 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
203 case readMaybe (TL.unpack t) of
204 Just (Rational i) | 0 <= i -> return i
205 | otherwise -> P.fancyFailure $
206 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
207 Nothing -> P.fancyFailure $
208 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
209 nat = RNC.rule "nat" $ RNC.int >>= \i ->
210 if i >= 0
211 then return $ Nat i
212 else P.fancyFailure $ Set.singleton $
213 P.ErrorCustom $ ErrorRead_Not_Nat i
214 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
215 if i > 0
216 then return $ Nat1 i
217 else P.fancyFailure $ Set.singleton $
218 P.ErrorCustom $ ErrorRead_Not_Nat1 i
219
220 parserElement :: XML.QName -> Parser a -> Cell XML.XMLs -> Parser a
221 parserElement n p (Sourced state_locTCT ts) = do
222 let mayNameOrFigureName
223 | n == "aside" = Nothing
224 -- NOTE: skip aside.
225 | n == "figure"
226 -- NOTE: special case renaming the current XML.Pos
227 -- using the @type attribute to have positions like this:
228 -- section1.Quote1
229 -- section1.Example1
230 -- section1.Quote2
231 -- instead of:
232 -- section1.figure1
233 -- section1.figure2
234 -- section1.figure3
235 , Just ty <- getFirst $ (`foldMap` ts) $ \case
236 Tree (unSourced -> XML.NodeAttr "type") xs
237 | [Tree (Sourced _ (XML.NodeText t)) _] <- toList xs
238 , Just ty <- XML.ncName $ XML.unescapeText t
239 -> First $ Just ty
240 _ -> First Nothing
241 = Just $ XML.QName xmlns_dtc ty
242 | otherwise = Just n
243 case mayNameOrFigureName of
244 Nothing -> do
245 st <- S.get
246 S.put st{state_locTCT}
247 res <- parser p ts
248 S.put st
249 return res
250 Just nameOrFigureName -> do
251 st@State{state_posXML} <- S.get
252 let incrPrecedingSibling name =
253 maybe (Nat1 1) succNat1 $
254 Map.lookup name $
255 XML.pos_precedingSiblings state_posXML
256 S.put State
257 { state_posXML = state_posXML
258 -- NOTE: in children, push current name incremented on ancestors
259 -- and reset preceding siblings.
260 { XML.pos_precedingSiblings = mempty
261 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
262 , XML.pos_ancestorsWithFigureNames =
263 XML.pos_ancestorsWithFigureNames state_posXML |>
264 ( nameOrFigureName
265 , incrPrecedingSibling nameOrFigureName )
266 }
267 , state_locTCT
268 }
269 res <- parser p ts
270 S.put st
271 { state_posXML = state_posXML
272 -- NOTE: after current, increment current name
273 -- and reset ancestors.
274 { XML.pos_precedingSiblings =
275 (if n == nameOrFigureName then id
276 else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
277 Map.insertWith (const succNat1) n (Nat1 1) $
278 XML.pos_precedingSiblings state_posXML
279 }
280 }
281 return res
282
283 type instance RNC.Perm Parser = P.PermParser XML.XMLs Parser
284 instance RNC.Sym_Interleaved Parser where
285 interleaved = P.makePermParser
286 (<$$>) = (P.<$$>)
287 (<||>) = (P.<||>)
288 (<$?>) = (P.<$?>)
289 (<|?>) = (P.<|?>)
290 f <$*> a = f P.<$?> ([],P.some a)
291 f <|*> a = f P.<|?> ([],P.some a)
292 instance DTC.Sym_DTC Parser where
293 positionXML = S.gets state_posXML
294 locationTCT = S.gets state_locTCT
295
296 readDTC ::
297 DTC.Sym_DTC Parser =>
298 XML.XMLs ->
299 Either (P.ParseError (P.Token XML.XMLs) ErrorRead) DTC.Document
300 readDTC = (fst <$>) . runParser def (P.initialPos "":|[]) DTC.document
301
302 -- | @runParser st pos p xs@ runs a 'Parser' @p@ to parse @xs@ entirely,
303 -- using state @st@ from position @pos@.
304 runParser ::
305 DTC.Sym_DTC Parser =>
306 State ->
307 NonEmpty P.SourcePos -> Parser a -> XML.XMLs ->
308 Either (P.ParseError (P.Token XML.XMLs) ErrorRead) (a, State)
309 runParser st pos p inp =
310 let p' = S.runStateT (p <* RNC.none) st in
311 snd $
312 P.runParser' p' P.State
313 { P.stateInput = inp
314 , P.statePos =
315 case Seq.viewl inp of
316 Tree (Sourced ss _) _ :< _ ->
317 (<$> ss) $ \FileRange{fileRange_begin=bp, fileRange_file} ->
318 P.SourcePos fileRange_file
319 (P.mkPos $ filePos_line bp)
320 (P.mkPos $ filePos_column bp)
321 EmptyL -> pos
322 , P.stateTabWidth = P.pos1
323 , P.stateTokensProcessed = 0
324 }
325
326 -- | @parser p xs@ returns a 'Parser' parsing @xs@ with @p@,
327 -- applying 'setPosOnNextNode' in case of success.
328 parser ::
329 DTC.Sym_DTC Parser =>
330 Parser a -> XML.XMLs -> Parser a
331 parser p xs = do
332 st <- S.get
333 P.State{P.statePos=pos} <- P.getParserState
334 case runParser st pos p xs of
335 Left (P.TrivialError statePos un ex) -> do
336 -- NOTE: just re-raising exception.
337 s <- P.getParserState
338 P.setParserState s{P.statePos}
339 P.failure un ex
340 Left (P.FancyError statePos errs) -> do
341 -- NOTE: just re-raising exception.
342 s <- P.getParserState
343 P.setParserState s{P.statePos}
344 P.fancyFailure errs
345 Right (a, st') -> do
346 S.put st'
347 a <$ setPosOnNextNode
348
349 -- | Adjust the current 'P.SourcePos'
350 -- to be the begining of the following-sibling 'XML' node
351 -- so that on error 'P.toHints' keeps expected 'P.Token's in the list,
352 -- and thus makes useful error messages.
353 --
354 -- This is needed because the end of a 'Sourced'
355 -- is not necessarily the begin of the next 'Sourced'.
356 setPosOnNextNode :: Parser ()
357 setPosOnNextNode = do
358 P.State
359 { P.stateInput = inp
360 , P.statePos = pos :| _
361 } <- P.getParserState
362 case Seq.viewl inp of
363 EmptyL -> return ()
364 t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XML.XMLs) pos t
365
366 {-
367 instance P.Stream XML.XMLs where
368 type Token XML.XMLs = XML.XML
369 type Tokens XML.XMLs = XML.XMLs
370 take1_ s =
371 case Seq.viewl s of
372 Tree (unSourced -> XML.NodeComment{}) _ :< ts -> P.take1_ ts
373 t:<ts -> Just (t,ts)
374 EmptyL -> Nothing
375 positionAt1 _s pos (Tree (Sourced (FileRange{fileRange_begin=FilePos l c}:|_) _n) _ts) =
376 pos{ P.sourceLine = P.mkPos l
377 , P.sourceColumn = P.mkPos c }
378 positionAtN s pos ts =
379 case Seq.viewl ts of
380 t :< _ -> P.positionAt1 s pos t
381 EmptyL -> pos
382 advance1 _s _indent pos (Tree (Sourced (FileRange{fileRange_end=FilePos l c}:|_) _n) _ts) =
383 -- WARNING: the end of a 'Sourced' is not necessarily
384 -- the beginning of the next 'Sourced'.
385 pos{ P.sourceLine = P.mkPos l
386 , P.sourceColumn = P.mkPos c }
387 advanceN s = foldl' . P.advance1 s
388 takeN_ n s | n <= 0 = Just (mempty, s)
389 | null s = Nothing
390 | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XML.NodeComment and XML.XmlInclude
391 tokensToChunk _s = Seq.fromList
392 chunkToTokens _s = toList
393 chunkLength _s = Seq.length
394 takeWhile_ = Seq.spanl
395 -}
396 instance P.ShowToken XML.XML where
397 showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks
398 where
399 showTree :: XML.XML -> String
400 showTree (Tree a _ts) =
401 showSourced a $ \case
402 XML.NodeAttr n -> "attribute "<>show (remove_XMLNS_DTC n)
403 XML.NodeCDATA _t -> "cdata"
404 XML.NodeComment _c -> "comment"
405 XML.NodeElem n -> "element "<>show (remove_XMLNS_DTC n)
406 XML.NodePI n _t -> "processing-instruction"<>show n
407 XML.NodeText _t -> "text"
408 remove_XMLNS_DTC n
409 | XML.qNameSpace n == xmlns_dtc = n{XML.qNameSpace=""}
410 | otherwise = n
411
412 showSourced (Sourced path@(FileRange{fileRange_file} :| _) a) f =
413 if null fileRange_file
414 then f a
415 else f a <> foldMap (\p -> "\n in "<>show p) path
416
417 -- ** Type 'ErrorRead'
418 data ErrorRead
419 = ErrorRead_EndOfInput
420 | ErrorRead_Not_Bool TL.Text
421 | ErrorRead_Not_Int TL.Text
422 | ErrorRead_Not_Nat Int
423 | ErrorRead_Not_Nat1 Int
424 | ErrorRead_Not_Rational TL.Text
425 | ErrorRead_Not_Positive TL.Text
426 {- ErrorRead_Unexpected P.sourcePos XML -}
427 deriving (Eq,Ord,Show)
428 instance P.ShowErrorComponent ErrorRead where
429 showErrorComponent = show
430
431 -- ** Type 'Rational'
432 -- | Wrapper to change the 'Read' instance.
433 newtype Rational = Rational Ratio.Rational
434 instance Read Rational where
435 readPrec = do
436 x <- Read.step readPrec
437 Read.expectP (Read.Symbol "/")
438 y <- Read.step readPrec
439 return $ Rational (x % y)