]> Git — Sourcephile - doclang.git/blob - Hdoc/DTC/Read/TCT.hs
vim: polish code
[doclang.git] / Hdoc / DTC / Read / TCT.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Hdoc.DTC.Read.TCT where
10
11 import Control.Applicative (Applicative(..))
12 import Control.Monad (Monad(..))
13 import Data.Bool
14 import Data.Default.Class (Default(..))
15 import Data.Either (Either(..))
16 import Data.Eq (Eq(..))
17 import Data.Foldable (Foldable(..))
18 import Data.Function (($), (.), const, id)
19 import Data.Functor ((<$>), (<$))
20 import Data.Int (Int)
21 import Data.List.NonEmpty (NonEmpty(..))
22 import Data.Maybe (Maybe(..), maybe)
23 import Data.Monoid (Monoid(..), First(..))
24 import Data.Ord (Ord(..))
25 import Data.Proxy (Proxy(..))
26 import Data.Ratio ((%))
27 import Data.Semigroup (Semigroup(..))
28 import Data.Sequence (ViewL(..), (|>))
29 import Data.String (String)
30 import Data.Tuple (fst, snd)
31 import Text.Read (readMaybe, Read(..))
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.List as List
35 import qualified Data.Map.Strict as Map
36 import qualified Data.Ratio as Ratio
37 import qualified Data.Sequence as Seq
38 import qualified Data.Set as Set
39 import qualified Data.Text.Lazy as TL
40 import qualified GHC.Read as Read (expectP)
41 import qualified Text.Megaparsec as P
42 import qualified Text.Megaparsec.Perm as P
43 import qualified Text.Read as Read
44
45 import Hdoc.TCT hiding (Parser, ErrorRead)
46 import Hdoc.Utils (Nat(..), Nat1(..), succNat1)
47 import qualified Hdoc.XML as XML
48 import qualified Hdoc.DTC.Document as DTC
49 import qualified Hdoc.DTC.Sym as DTC
50 import qualified Hdoc.RNC.Sym as RNC
51 import qualified Hdoc.TCT.Cell as TCT
52
53 -- * Type 'State'
54 data State = State
55 { state_posXML :: XML.Pos
56 , state_locTCT :: TCT.Location
57 -- ^ Unfortunately Megaparsec's 'P.statePos'
58 -- is not a good fit to encode 'TCT.Location'.
59 } deriving (Eq,Show)
60 instance Default State where
61 def = State
62 { state_posXML = def
63 , state_locTCT = def
64 }
65
66 -- * Type 'Parser'
67 type Parser = S.StateT State (P.Parsec ErrorRead XML.XMLs)
68
69 instance RNC.Sym_Rule Parser where
70 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
71 rule _n = id
72 instance RNC.Sym_RNC Parser where
73 none = P.label "none" $ P.eof
74 fail = P.label "fail" $ P.failure Nothing mempty
75 any = P.label "any" $ p_satisfyMaybe $ const $ Just ()
76 anyElem p = P.label "anyElem" $ do
77 Cell state_locTCT (n,ts) <- P.token check $ Just expected
78 parserElement n (p n) (Cell state_locTCT ts)
79 where
80 expected = Tree (cell0 $ XML.NodeElem "*") mempty
81 check (Tree cell@(unCell -> XML.NodeElem e) ts) = Right $ (e,ts) <$ cell
82 check t = Left
83 ( Just $ P.Tokens $ pure t
84 , Set.singleton $ P.Tokens $ pure expected )
85 element n p = do
86 ts <- P.token check $ Just expected
87 parserElement n p ts
88 where
89 expected = Tree (cell0 $ XML.NodeElem n) mempty
90 check (Tree cell@(unCell -> XML.NodeElem e) ts) | e == n = Right (ts <$ cell)
91 check t = Left
92 ( Just $ P.Tokens $ pure t
93 , Set.singleton $ P.Tokens $ pure expected )
94 attribute n p = do
95 v <- P.token check $ Just expected
96 parser p $ Seq.singleton $ Tree0 v
97 where
98 expected = Tree0 (cell0 $ XML.NodeAttr n "")
99 check (Tree0 cell@(unCell -> XML.NodeAttr k v)) | k == n =
100 Right $ XML.NodeText v <$ cell
101 check t = Left
102 ( Just $ P.Tokens $ pure t
103 , Set.singleton $ P.Tokens $ pure expected )
104 comment = do
105 s <- P.getInput
106 case Seq.viewl s of
107 Tree0 (unCell -> XML.NodeComment c) :< ts -> do
108 P.setInput ts
109 c <$ setPosOnNextNode
110 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
111 EmptyL -> P.failure Nothing ex
112 where
113 ex = Set.singleton $ P.Tokens $ pure expected
114 expected = Tree0 (cell0 $ XML.NodeComment "")
115 text = do
116 P.token check (Just expected)
117 <* setPosOnNextNode
118 where
119 expected = Tree0 (cell0 $ XML.NodeText "")
120 check (Tree0 (unCell -> XML.NodeText t)) = Right t
121 check t = Left
122 ( Just $ P.Tokens $ pure t
123 , Set.singleton $ P.Tokens $ pure expected )
124 bool = RNC.rule "bool" $ RNC.text >>= \t ->
125 case t of
126 "true" -> return True
127 "false" -> return False
128 _ -> P.fancyFailure $
129 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t
130 int = RNC.rule "int" $ RNC.text >>= \t ->
131 case readMaybe (TL.unpack t) of
132 Just i -> return i
133 Nothing -> P.fancyFailure $
134 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
135 rational = RNC.rule "rational" $ RNC.text >>= \t ->
136 case readMaybe (TL.unpack t) of
137 Just (Rational i) | 0 <= i -> return i
138 | otherwise -> P.fancyFailure $
139 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
140 Nothing -> P.fancyFailure $
141 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
142 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
143 case readMaybe (TL.unpack t) of
144 Just (Rational i) | 0 <= i -> return i
145 | otherwise -> P.fancyFailure $
146 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
147 Nothing -> P.fancyFailure $
148 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
149 nat = RNC.rule "nat" $ RNC.int >>= \i ->
150 if i >= 0
151 then return $ Nat i
152 else P.fancyFailure $ Set.singleton $
153 P.ErrorCustom $ ErrorRead_Not_Nat i
154 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
155 if i > 0
156 then return $ Nat1 i
157 else P.fancyFailure $ Set.singleton $
158 P.ErrorCustom $ ErrorRead_Not_Nat1 i
159 (<|>) = (P.<|>)
160 many = P.many
161 some = P.some
162 optional = P.optional
163 option = P.option
164 choice = P.choice
165 try = P.try
166
167 parserElement :: XML.Name -> Parser a -> Cell XML.XMLs -> Parser a
168 parserElement n p (Cell state_locTCT ts) = do
169 let mayNameOrFigureName
170 | n == "aside" = Nothing
171 -- NOTE: skip aside.
172 | n == "figure"
173 -- NOTE: special case renaming the current XML.XmlPos
174 -- using the @type attribute to have positions like this:
175 -- section1.Quote1
176 -- section1.Example1
177 -- section1.Quote2
178 -- instead of:
179 -- section1.figure1
180 -- section1.figure2
181 -- section1.figure3
182 , Just ty <- getFirst $ (`foldMap` ts) $ \case
183 Tree0 (unCell -> XML.NodeAttr "type" ty) -> First $ Just ty
184 _ -> First Nothing
185 = Just $ XML.localName $ ty
186 | otherwise = Just n
187 case mayNameOrFigureName of
188 Nothing -> do
189 st <- S.get
190 S.put st{state_locTCT}
191 res <- parser p ts
192 S.put st
193 return res
194 Just nameOrFigureName -> do
195 st@State{state_posXML} <- S.get
196 let incrPrecedingSibling name =
197 maybe (Nat1 1) succNat1 $
198 Map.lookup name $
199 XML.pos_precedingSiblings state_posXML
200 S.put State
201 { state_posXML = state_posXML
202 -- NOTE: in children, push current name incremented on ancestors
203 -- and reset preceding siblings.
204 { XML.pos_precedingSiblings = mempty
205 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
206 , XML.pos_ancestorsWithFigureNames =
207 XML.pos_ancestorsWithFigureNames state_posXML |>
208 ( nameOrFigureName
209 , incrPrecedingSibling nameOrFigureName )
210 }
211 , state_locTCT
212 }
213 res <- parser p ts
214 S.put st
215 { state_posXML = state_posXML
216 -- NOTE: after current, increment current name
217 -- and reset ancestors.
218 { XML.pos_precedingSiblings =
219 (if n == nameOrFigureName then id
220 else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
221 Map.insertWith (const succNat1) n (Nat1 1) $
222 XML.pos_precedingSiblings state_posXML
223 }
224 }
225 return res
226
227 type instance RNC.Perm Parser = P.PermParser XML.XMLs Parser
228 instance RNC.Sym_Interleaved Parser where
229 interleaved = P.makePermParser
230 (<$$>) = (P.<$$>)
231 (<||>) = (P.<||>)
232 (<$?>) = (P.<$?>)
233 (<|?>) = (P.<|?>)
234 f <$*> a = f P.<$?> ([],P.some a)
235 f <|*> a = f P.<|?> ([],P.some a)
236 instance DTC.Sym_DTC Parser where
237 positionXML = S.gets state_posXML
238 locationTCT = S.gets state_locTCT
239
240 readDTC ::
241 DTC.Sym_DTC Parser =>
242 XML.XMLs ->
243 Either (P.ParseError (P.Token XML.XMLs) ErrorRead) DTC.Document
244 readDTC = (fst <$>) . runParser def (P.initialPos "":|[]) DTC.document
245
246 -- | @runParser st pos p xs@ runs a 'Parser' @p@ to parse @xs@ entirely,
247 -- using state @st@ from position @pos@.
248 runParser ::
249 DTC.Sym_DTC Parser =>
250 State ->
251 NonEmpty P.SourcePos -> Parser a -> XML.XMLs ->
252 Either (P.ParseError (P.Token XML.XMLs) ErrorRead) (a, State)
253 runParser st pos p inp =
254 let p' = S.runStateT (p <* RNC.none) st in
255 snd $
256 P.runParser' p' P.State
257 { P.stateInput = inp
258 , P.statePos =
259 case Seq.viewl inp of
260 Tree (Cell ss _) _ :< _ ->
261 (<$> ss) $ \Span{span_begin=bp, span_file} ->
262 P.SourcePos span_file
263 (P.mkPos $ pos_line bp)
264 (P.mkPos $ pos_column bp)
265 EmptyL -> pos
266 , P.stateTabWidth = P.pos1
267 , P.stateTokensProcessed = 0
268 }
269
270 -- | @parser p xs@ returns a 'Parser' parsing @xs@ with @p@,
271 -- applying 'setPosOnNextNode' in case of success.
272 parser ::
273 DTC.Sym_DTC Parser =>
274 Parser a -> XML.XMLs -> Parser a
275 parser p xs = do
276 st <- S.get
277 P.State{P.statePos=pos} <- P.getParserState
278 case runParser st pos p xs of
279 Left (P.TrivialError statePos un ex) -> do
280 -- NOTE: just re-raising exception.
281 s <- P.getParserState
282 P.setParserState s{P.statePos}
283 P.failure un ex
284 Left (P.FancyError statePos errs) -> do
285 -- NOTE: just re-raising exception.
286 s <- P.getParserState
287 P.setParserState s{P.statePos}
288 P.fancyFailure errs
289 Right (a, st') -> do
290 S.put st'
291 a <$ setPosOnNextNode
292
293 -- | Adjust the current 'P.SourcePos'
294 -- to be the begining of the following-sibling 'XML' node
295 -- so that on error 'P.toHints' keeps expected 'P.Token's in the list,
296 -- and thus makes useful error messages.
297 --
298 -- This is needed because the end of a 'Cell'
299 -- is not necessarily the begin of the next 'Cell'.
300 setPosOnNextNode :: Parser ()
301 setPosOnNextNode = do
302 P.State
303 { P.stateInput = inp
304 , P.statePos = pos :| _
305 } <- P.getParserState
306 case Seq.viewl inp of
307 EmptyL -> return ()
308 t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XML.XMLs) pos t
309
310 instance P.Stream XML.XMLs where
311 type Token XML.XMLs = XML.XML
312 type Tokens XML.XMLs = XML.XMLs
313 take1_ s =
314 case Seq.viewl s of
315 Tree (unCell -> XML.NodeComment{}) _ :< ts -> P.take1_ ts
316 t:<ts -> Just (t,ts)
317 EmptyL -> Nothing
318 positionAt1 _s pos (Tree (Cell (Span{span_begin=Pos l c}:|_) _n) _ts) =
319 pos{ P.sourceLine = P.mkPos l
320 , P.sourceColumn = P.mkPos c }
321 positionAtN s pos ts =
322 case Seq.viewl ts of
323 t :< _ -> P.positionAt1 s pos t
324 EmptyL -> pos
325 advance1 _s _indent pos (Tree (Cell (Span{span_end=Pos l c}:|_) _n) _ts) =
326 -- WARNING: the end of a 'Cell' is not necessarily
327 -- the beginning of the next 'Cell'.
328 pos{ P.sourceLine = P.mkPos l
329 , P.sourceColumn = P.mkPos c }
330 advanceN s = foldl' . P.advance1 s
331 takeN_ n s | n <= 0 = Just (mempty, s)
332 | null s = Nothing
333 | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XML.NodeComment and XML.XmlInclude
334 tokensToChunk _s = Seq.fromList
335 chunkToTokens _s = toList
336 chunkLength _s = Seq.length
337 takeWhile_ = Seq.spanl
338 instance P.ShowToken XML.XML where
339 showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks
340 where
341 showTree :: XML.XML -> String
342 showTree (Tree a _ts) =
343 showCell a $ \case
344 XML.NodeElem n -> "<"<>show n<>">"
345 XML.NodeAttr n _v -> show n<>"="
346 XML.NodeText _t -> "text"
347 XML.NodeComment _c -> "comment"
348
349 showCell (Cell path@(Span{span_file} :| _) a) f =
350 if null span_file
351 then f a
352 else f a <> foldMap (\p -> "\n in "<>show p) path
353
354 -- ** Type 'ErrorRead'
355 data ErrorRead
356 = ErrorRead_EndOfInput
357 | ErrorRead_Not_Bool TL.Text
358 | ErrorRead_Not_Int TL.Text
359 | ErrorRead_Not_Nat Int
360 | ErrorRead_Not_Nat1 Int
361 | ErrorRead_Not_Rational TL.Text
362 | ErrorRead_Not_Positive TL.Text
363 {- ErrorRead_Unexpected P.sourcePos XML -}
364 deriving (Eq,Ord,Show)
365 instance P.ShowErrorComponent ErrorRead where
366 showErrorComponent = show
367
368 -- ** Type 'Rational'
369 -- | Wrapper to change the 'Read' instance.
370 newtype Rational = Rational Ratio.Rational
371 instance Read Rational where
372 readPrec = do
373 x <- Read.step readPrec
374 Read.expectP (Read.Symbol "/")
375 y <- Read.step readPrec
376 return $ Rational (x % y)