]> Git — Sourcephile - doclang.git/blob - Hdoc/DTC/Read/TCT.hs
Add PairAt, TokenAt and PlainAt.
[doclang.git] / Hdoc / DTC / Read / TCT.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Hdoc.DTC.Read.TCT where
10
11 import Control.Applicative (Applicative(..))
12 import Control.Monad (Monad(..))
13 import Data.Bool
14 import Data.Default.Class (Default(..))
15 import Data.Either (Either(..))
16 import Data.Eq (Eq(..))
17 import Data.Foldable (Foldable(..))
18 import Data.Function (($), (.), const, id)
19 import Data.Functor ((<$>), (<$))
20 import Data.Int (Int)
21 import Data.List.NonEmpty (NonEmpty(..))
22 import Data.Maybe (Maybe(..), maybe)
23 import Data.Monoid (Monoid(..), First(..))
24 import Data.Ord (Ord(..))
25 import Data.Proxy (Proxy(..))
26 import Data.Ratio ((%))
27 import Data.Semigroup (Semigroup(..))
28 import Data.Sequence (ViewL(..), (|>))
29 import Data.String (String)
30 import Data.Tuple (fst, snd)
31 import Text.Read (readMaybe, Read(..))
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.List as List
35 import qualified Data.Map.Strict as Map
36 import qualified Data.Ratio as Ratio
37 import qualified Data.Sequence as Seq
38 import qualified Data.Set as Set
39 import qualified Data.Text.Lazy as TL
40 import qualified GHC.Read as Read (expectP)
41 import qualified Text.Megaparsec as P
42 import qualified Text.Megaparsec.Perm as P
43 import qualified Text.Read as Read
44
45 import Hdoc.TCT hiding (Parser, ErrorRead)
46 import Hdoc.Utils (Nat(..), Nat1(..), succNat1)
47 import qualified Hdoc.XML as XML
48 import qualified Hdoc.DTC.Document as DTC
49 import qualified Hdoc.DTC.Sym as DTC
50 import qualified Hdoc.RNC.Sym as RNC
51 import qualified Hdoc.TCT.Cell as TCT
52
53 -- * Type 'State'
54 data State = State
55 { state_posXML :: XML.Pos
56 , state_locTCT :: TCT.Location
57 -- ^ Unfortunately Megaparsec's 'P.statePos'
58 -- is not a good fit to encode 'TCT.Location'.
59 } deriving (Eq,Show)
60 instance Default State where
61 def = State
62 { state_posXML = def
63 , state_locTCT = def
64 }
65
66 -- * Type 'Parser'
67 type Parser = S.StateT State (P.Parsec ErrorRead XML.XMLs)
68
69 instance RNC.Sym_Rule Parser where
70 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
71 rule _n = id
72 instance RNC.Sym_RNC Parser where
73 none = P.label "none" $ P.eof
74 fail = P.label "fail" $ P.failure Nothing mempty
75 any = P.label "any" $ p_satisfyMaybe $ const $ Just ()
76 anyElem p = P.label "anyElem" $ do
77 Cell state_locTCT (n,ts) <- P.token check $ Just expected
78 parserElement n (p n) (Cell state_locTCT ts)
79 where
80 expected = Tree (cell0 $ XML.NodeElem "*") mempty
81 check (Tree cell@(unCell -> XML.NodeElem e) ts) = Right $ (e,ts) <$ cell
82 check t = Left
83 ( Just $ P.Tokens $ pure t
84 , Set.singleton $ P.Tokens $ pure expected )
85 element n p = do
86 ts <- P.token check $ Just expected
87 parserElement n p ts
88 where
89 expected = Tree (cell0 $ XML.NodeElem n) mempty
90 check (Tree cell@(unCell -> XML.NodeElem e) ts) | e == n = Right (ts <$ cell)
91 check t = Left
92 ( Just $ P.Tokens $ pure t
93 , Set.singleton $ P.Tokens $ pure expected )
94 attribute n p = do
95 v <- P.token check $ Just expected
96 parser p $ Seq.singleton $ Tree0 v
97 where
98 expected = Tree0 (cell0 $ XML.NodeAttr n "")
99 check (Tree0 cell@(unCell -> XML.NodeAttr k v)) | k == n =
100 Right $ XML.NodeText v <$ cell
101 check t = Left
102 ( Just $ P.Tokens $ pure t
103 , Set.singleton $ P.Tokens $ pure expected )
104 comment = do
105 s <- P.getInput
106 case Seq.viewl s of
107 Tree0 (unCell -> XML.NodeComment c) :< ts -> do
108 P.setInput ts
109 c <$ setPosOnNextNode
110 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
111 EmptyL -> P.failure Nothing ex
112 where
113 ex = Set.singleton $ P.Tokens $ pure expected
114 expected = Tree0 (cell0 $ XML.NodeComment "")
115 text = do
116 P.token check (Just expected)
117 <* setPosOnNextNode
118 where
119 expected = Tree0 (cell0 $ XML.NodeText "")
120 check (Tree0 (unCell -> XML.NodeText t)) = Right t
121 check t = Left
122 ( Just $ P.Tokens $ pure t
123 , Set.singleton $ P.Tokens $ pure expected )
124 int = RNC.rule "int" $ RNC.text >>= \t ->
125 case readMaybe (TL.unpack t) of
126 Just i -> return i
127 Nothing -> P.fancyFailure $
128 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
129 rational = RNC.rule "rational" $ RNC.text >>= \t ->
130 case readMaybe (TL.unpack t) of
131 Just (Rational i) | 0 <= i -> return i
132 | otherwise -> P.fancyFailure $
133 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
134 Nothing -> P.fancyFailure $
135 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
136 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
137 case readMaybe (TL.unpack t) of
138 Just (Rational i) | 0 <= i -> return i
139 | otherwise -> P.fancyFailure $
140 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
141 Nothing -> P.fancyFailure $
142 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
143 nat = RNC.rule "nat" $ RNC.int >>= \i ->
144 if i >= 0
145 then return $ Nat i
146 else P.fancyFailure $ Set.singleton $
147 P.ErrorCustom $ ErrorRead_Not_Nat i
148 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
149 if i > 0
150 then return $ Nat1 i
151 else P.fancyFailure $ Set.singleton $
152 P.ErrorCustom $ ErrorRead_Not_Nat1 i
153 (<|>) = (P.<|>)
154 many = P.many
155 some = P.some
156 optional = P.optional
157 option = P.option
158 choice = P.choice
159 try = P.try
160
161 parserElement :: XML.Name -> Parser a -> Cell XML.XMLs -> Parser a
162 parserElement n p (Cell state_locTCT ts) = do
163 let mayNameOrFigureName
164 | n == "aside" = Nothing
165 -- NOTE: skip aside.
166 | n == "figure"
167 -- NOTE: special case renaming the current XML.XmlPos
168 -- using the @type attribute to have positions like this:
169 -- section1.Quote1
170 -- section1.Example1
171 -- section1.Quote2
172 -- instead of:
173 -- section1.figure1
174 -- section1.figure2
175 -- section1.figure3
176 , Just ty <- getFirst $ (`foldMap` ts) $ \case
177 Tree0 (unCell -> XML.NodeAttr "type" ty) -> First $ Just ty
178 _ -> First Nothing
179 = Just $ XML.localName $ ty
180 | otherwise = Just n
181 case mayNameOrFigureName of
182 Nothing -> do
183 st <- S.get
184 S.put st{state_locTCT}
185 res <- parser p ts
186 S.put st
187 return res
188 Just nameOrFigureName -> do
189 st@State{state_posXML} <- S.get
190 let incrPrecedingSibling name =
191 maybe (Nat1 1) succNat1 $
192 Map.lookup name $
193 XML.pos_precedingSiblings state_posXML
194 S.put State
195 { state_posXML = state_posXML
196 -- NOTE: in children, push current name incremented on ancestors
197 -- and reset preceding siblings.
198 { XML.pos_precedingSiblings = mempty
199 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
200 , XML.pos_ancestorsWithFigureNames =
201 XML.pos_ancestorsWithFigureNames state_posXML |>
202 ( nameOrFigureName
203 , incrPrecedingSibling nameOrFigureName )
204 }
205 , state_locTCT
206 }
207 res <- parser p ts
208 S.put st
209 { state_posXML = state_posXML
210 -- NOTE: after current, increment current name
211 -- and reset ancestors.
212 { XML.pos_precedingSiblings =
213 (if n == nameOrFigureName then id
214 else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
215 Map.insertWith (const succNat1) n (Nat1 1) $
216 XML.pos_precedingSiblings state_posXML
217 }
218 }
219 return res
220
221 type instance RNC.Perm Parser = P.PermParser XML.XMLs Parser
222 instance RNC.Sym_Interleaved Parser where
223 interleaved = P.makePermParser
224 (<$$>) = (P.<$$>)
225 (<||>) = (P.<||>)
226 (<$?>) = (P.<$?>)
227 (<|?>) = (P.<|?>)
228 f <$*> a = f P.<$?> ([],P.some a)
229 f <|*> a = f P.<|?> ([],P.some a)
230 instance DTC.Sym_DTC Parser where
231 positionXML = S.gets state_posXML
232 locationTCT = S.gets state_locTCT
233
234 readDTC ::
235 DTC.Sym_DTC Parser =>
236 XML.XMLs ->
237 Either (P.ParseError (P.Token XML.XMLs) ErrorRead) DTC.Document
238 readDTC = (fst <$>) . runParser def (P.initialPos "":|[]) DTC.document
239
240 -- | @runParser st pos p xs@ runs a 'Parser' @p@ to parse @xs@ entirely,
241 -- using state @st@ from position @pos@.
242 runParser ::
243 DTC.Sym_DTC Parser =>
244 State ->
245 NonEmpty P.SourcePos -> Parser a -> XML.XMLs ->
246 Either (P.ParseError (P.Token XML.XMLs) ErrorRead) (a, State)
247 runParser st pos p inp =
248 let p' = S.runStateT (p <* RNC.none) st in
249 snd $
250 P.runParser' p' P.State
251 { P.stateInput = inp
252 , P.statePos =
253 case Seq.viewl inp of
254 Tree (Cell ss _) _ :< _ ->
255 (<$> ss) $ \Span{span_begin=bp, span_file} ->
256 P.SourcePos span_file
257 (P.mkPos $ pos_line bp)
258 (P.mkPos $ pos_column bp)
259 EmptyL -> pos
260 , P.stateTabWidth = P.pos1
261 , P.stateTokensProcessed = 0
262 }
263
264 -- | @parser p xs@ returns a 'Parser' parsing @xs@ with @p@,
265 -- applying 'setPosOnNextNode' in case of success.
266 parser ::
267 DTC.Sym_DTC Parser =>
268 Parser a -> XML.XMLs -> Parser a
269 parser p xs = do
270 st <- S.get
271 P.State{P.statePos=pos} <- P.getParserState
272 case runParser st pos p xs of
273 Left (P.TrivialError statePos un ex) -> do
274 -- NOTE: just re-raising exception.
275 s <- P.getParserState
276 P.setParserState s{P.statePos}
277 P.failure un ex
278 Left (P.FancyError statePos errs) -> do
279 -- NOTE: just re-raising exception.
280 s <- P.getParserState
281 P.setParserState s{P.statePos}
282 P.fancyFailure errs
283 Right (a, st') -> do
284 S.put st'
285 a <$ setPosOnNextNode
286
287 -- | Adjust the current 'P.SourcePos'
288 -- to be the begining of the following-sibling 'XML' node
289 -- so that on error 'P.toHints' keeps expected 'P.Token's in the list,
290 -- and thus makes useful error messages.
291 --
292 -- This is needed because the end of a 'Cell'
293 -- is not necessarily the begin of the next 'Cell'.
294 setPosOnNextNode :: Parser ()
295 setPosOnNextNode = do
296 P.State
297 { P.stateInput = inp
298 , P.statePos = pos :| _
299 } <- P.getParserState
300 case Seq.viewl inp of
301 EmptyL -> return ()
302 t :< _ -> P.setPosition $ P.positionAt1 (Proxy::Proxy XML.XMLs) pos t
303
304 instance P.Stream XML.XMLs where
305 type Token XML.XMLs = XML.XML
306 type Tokens XML.XMLs = XML.XMLs
307 take1_ s =
308 case Seq.viewl s of
309 Tree (unCell -> XML.NodeComment{}) _ :< ts -> P.take1_ ts
310 t:<ts -> Just (t,ts)
311 EmptyL -> Nothing
312 positionAt1 _s pos (Tree (Cell (Span{span_begin=Pos l c}:|_) _n) _ts) =
313 pos{ P.sourceLine = P.mkPos l
314 , P.sourceColumn = P.mkPos c }
315 positionAtN s pos ts =
316 case Seq.viewl ts of
317 t :< _ -> P.positionAt1 s pos t
318 EmptyL -> pos
319 advance1 _s _indent pos (Tree (Cell (Span{span_end=Pos l c}:|_) _n) _ts) =
320 -- WARNING: the end of a 'Cell' is not necessarily
321 -- the beginning of the next 'Cell'.
322 pos{ P.sourceLine = P.mkPos l
323 , P.sourceColumn = P.mkPos c }
324 advanceN s = foldl' . P.advance1 s
325 takeN_ n s | n <= 0 = Just (mempty, s)
326 | null s = Nothing
327 | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XML.NodeComment and XML.XmlInclude
328 tokensToChunk _s = Seq.fromList
329 chunkToTokens _s = toList
330 chunkLength _s = Seq.length
331 takeWhile_ = Seq.spanl
332 instance P.ShowToken XML.XML where
333 showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks
334 where
335 showTree :: XML.XML -> String
336 showTree (Tree a _ts) =
337 showCell a $ \case
338 XML.NodeElem n -> "<"<>show n<>">"
339 XML.NodeAttr n _v -> show n<>"="
340 XML.NodeText _t -> "text"
341 XML.NodeComment _c -> "comment"
342
343 showCell (Cell path@(Span{span_file} :| _) a) f =
344 if null span_file
345 then f a
346 else f a <> foldMap (\p -> "\n in "<>show p) path
347
348 -- ** Type 'ErrorRead'
349 data ErrorRead
350 = ErrorRead_EndOfInput
351 | ErrorRead_Not_Int TL.Text
352 | ErrorRead_Not_Nat Int
353 | ErrorRead_Not_Nat1 Int
354 | ErrorRead_Not_Rational TL.Text
355 | ErrorRead_Not_Positive TL.Text
356 {- ErrorRead_Unexpected P.sourcePos XML -}
357 deriving (Eq,Ord,Show)
358 instance P.ShowErrorComponent ErrorRead where
359 showErrorComponent = show
360
361 -- ** Type 'Rational'
362 -- | Wrapper to change the 'Read' instance.
363 newtype Rational = Rational Ratio.Rational
364 instance Read Rational where
365 readPrec = do
366 x <- Read.step readPrec
367 Read.expectP (Read.Symbol "/")
368 y <- Read.step readPrec
369 return $ Rational (x % y)