]> Git — Sourcephile - doclang.git/blob - Language/DTC/Read/TCT.hs
Fix XML rendition of PairFrenchquote.
[doclang.git] / Language / DTC / Read / TCT.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Language.DTC.Read.TCT where
10
11 import Control.Applicative (Applicative(..))
12 import Control.Monad (Monad(..))
13 import Data.Bool
14 import Data.Default.Class (Default(..))
15 import Data.Either (Either(..))
16 import Data.Eq (Eq(..))
17 import Data.Foldable (Foldable(..))
18 import Data.Function (($), (.), const, id)
19 import Data.Functor ((<$>), (<$))
20 import Data.Int (Int)
21 import Data.List.NonEmpty (NonEmpty(..))
22 import Data.Maybe (Maybe(..), maybe)
23 import Data.Monoid (Monoid(..), First(..))
24 import Data.Ord (Ord(..))
25 import Data.Proxy (Proxy(..))
26 import Data.Semigroup (Semigroup(..))
27 import Data.Sequence (ViewL(..), (|>))
28 import Data.String (String)
29 import Data.Tuple (snd)
30 import Prelude (Num(..))
31 import Text.Read (readMaybe)
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.List as List
35 import qualified Data.Map.Strict as Map
36 import qualified Data.Sequence as Seq
37 import qualified Data.Set as Set
38 import qualified Data.Text.Lazy as TL
39 import qualified Text.Megaparsec as P
40 import qualified Text.Megaparsec.Perm as P
41
42 import Language.TCT hiding (Parser, ErrorRead)
43 import Language.XML
44 import qualified Language.DTC.Document as DTC
45 import qualified Language.DTC.Sym as DTC
46 import qualified Language.RNC.Sym as RNC
47
48 -- * Type 'State'
49 type State = DTC.Pos
50
51 -- * Type 'Parser'
52 -- type Parser = P.Parsec ErrorRead XMLs
53 type Parser = S.StateT State (P.Parsec ErrorRead XMLs)
54
55 instance RNC.Sym_Rule Parser where
56 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
57 rule _n = id
58 instance RNC.Sym_RNC Parser where
59 none = P.label "none" $ P.eof
60 fail = P.label "fail" $ P.failure Nothing mempty
61 any = P.label "any" $ p_satisfyMaybe $ const $ Just ()
62 anyElem p = P.label "anyElem" $ do
63 (n,ts) <- P.token check $ Just expected
64 parserXMLs (p n) ts
65 where
66 expected = Tree (cell0 $ XmlElem "*") mempty
67 check (Tree (unCell -> XmlElem e) ts) = Right (e,ts)
68 check t = Left
69 ( Just $ P.Tokens $ pure t
70 , Set.singleton $ P.Tokens $ pure expected )
71 element n p = do
72 ts <- P.token check $ Just expected
73 pos <- S.get
74 let nameOrFigureName
75 | n == "figure"
76 -- NOTE: special case renaming the current DTC.Pos
77 -- using the @type attribute to have positions like this:
78 -- section1.Quote1
79 -- section1.Example1
80 -- section1.Quote2
81 -- instead of:
82 -- section1.figure1
83 -- section1.figure2
84 -- section1.figure3
85 , Just ty <- getFirst $ (`foldMap` ts) $ \case
86 Tree0 (unCell -> XmlAttr "type" ty) -> First $ Just ty
87 _ -> First Nothing
88 = xmlLocalName $ ty
89 | otherwise = n
90 let anc name = maybe 1 (+1) $ Map.lookup name $ DTC.posPrecedingsSiblings pos
91 S.put pos
92 { DTC.posAncestors = DTC.posAncestors pos |> (n,anc n)
93 , DTC.posAncestorsWithFigureNames =
94 DTC.posAncestorsWithFigureNames pos |>
95 (nameOrFigureName,anc nameOrFigureName)
96 , DTC.posPrecedingsSiblings = mempty
97 }
98 res <- parserXMLs p ts
99 S.put pos
100 { DTC.posPrecedingsSiblings=
101 (if n /= nameOrFigureName
102 then Map.insertWith (\_new old -> old + 1) nameOrFigureName 1
103 else id) $
104 Map.insertWith (\_new old -> old + 1) n 1 $
105 DTC.posPrecedingsSiblings pos
106 }
107 return res
108 where
109 expected = Tree (cell0 $ XmlElem n) mempty
110 check (Tree (unCell -> XmlElem e) ts) | e == n = Right ts
111 check t = Left
112 ( Just $ P.Tokens $ pure t
113 , Set.singleton $ P.Tokens $ pure expected )
114 attribute n p = do
115 v <- P.token check $ Just expected
116 parserXMLs p v
117 where
118 expected = Tree0 (cell0 $ XmlAttr n "")
119 check (Tree0 (Cell sp (XmlAttr k v))) | k == n =
120 Right $ Seq.singleton $ Tree0 $ Cell sp $ XmlText v
121 check t = Left
122 ( Just $ P.Tokens $ pure t
123 , Set.singleton $ P.Tokens $ pure expected )
124 comment = do
125 s <- P.getInput
126 case Seq.viewl s of
127 Tree0 (unCell -> XmlComment c) :< ts -> do
128 P.setInput ts
129 c <$ fixPos
130 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
131 EmptyL -> P.failure Nothing ex
132 where
133 ex = Set.singleton $ P.Tokens $ pure expected
134 expected = Tree0 (cell0 $ XmlComment "")
135 text = do
136 P.token check (Just expected)
137 <* fixPos
138 where
139 expected = Tree0 (cell0 $ XmlText "")
140 check (Tree0 (unCell -> XmlText t)) = Right t
141 check t = Left
142 ( Just $ P.Tokens $ pure t
143 , Set.singleton $ P.Tokens $ pure expected )
144 int = RNC.rule "int" $ RNC.text >>= \t ->
145 case readMaybe (TL.unpack t) of
146 Just i -> return i
147 Nothing -> P.fancyFailure $
148 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
149 nat = RNC.rule "nat" $ RNC.int >>= \i ->
150 if i >= 0
151 then return $ Nat i
152 else P.fancyFailure $ Set.singleton $
153 P.ErrorCustom $ ErrorRead_Not_Nat i
154 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
155 if i > 0
156 then return $ Nat1 i
157 else P.fancyFailure $ Set.singleton $
158 P.ErrorCustom $ ErrorRead_Not_Nat1 i
159 (<|>) = (P.<|>)
160 many = P.many
161 some = P.some
162 optional = P.optional
163 option = P.option
164 choice = P.choice
165 try = P.try
166 type instance RNC.Perm Parser = P.PermParser XMLs Parser
167 instance RNC.Sym_Interleaved Parser where
168 interleaved = P.makePermParser
169 (<$$>) = (P.<$$>)
170 (<||>) = (P.<||>)
171 (<$?>) = (P.<$?>)
172 (<|?>) = (P.<|?>)
173 f <$*> a = f P.<$?> ([],P.some a)
174 f <|*> a = f P.<|?> ([],P.some a)
175 instance DTC.Sym_DTC Parser where
176 position = S.get
177
178 readDTC ::
179 DTC.Sym_DTC Parser =>
180 XMLs ->
181 Either (P.ParseError (P.Token XMLs) ErrorRead) DTC.Document
182 readDTC = parseXMLs def (P.initialPos "") DTC.document
183
184 parseXMLs ::
185 DTC.Sym_DTC Parser =>
186 State ->
187 P.SourcePos -> Parser a -> XMLs ->
188 Either (P.ParseError (P.Token XMLs) ErrorRead) a
189 parseXMLs st pos p i =
190 snd $
191 P.runParser' ((`S.evalStateT` st) $ p <* RNC.none)
192 P.State
193 { P.stateInput = i
194 , P.statePos = pure $
195 case Seq.viewl i of
196 Tree (Cell (Span{span_begin=bp}:|_) _) _ :< _ ->
197 P.SourcePos "" -- FIXME: put a FilePath
198 (P.mkPos $ pos_line bp)
199 (P.mkPos $ pos_column bp)
200 EmptyL -> pos
201 , P.stateTabWidth = P.pos1
202 , P.stateTokensProcessed = 0
203 }
204
205 -- | @parserXMLs st pos p xs@ returns a 'Parser' parsing @xs@ with @p@ from state @st@.
206 parserXMLs ::
207 DTC.Sym_DTC Parser =>
208 Parser a -> XMLs -> Parser a
209 parserXMLs p xs = do
210 pos <- P.getPosition
211 st <- S.get
212 case parseXMLs st pos p xs of
213 Left (P.TrivialError (posErr:|_) un ex) -> do
214 P.setPosition posErr
215 P.failure un ex
216 Left (P.FancyError (posErr:|_) errs) -> do
217 P.setPosition posErr
218 P.fancyFailure errs
219 Right a -> a <$ fixPos
220
221 -- | Adjust the current 'P.SourcePos'
222 -- to be the begining of the following-sibling 'XML' node
223 -- so that on error 'P.toHints' keeps expected 'P.Token's in the list,
224 -- and thus makes useful error messages.
225 --
226 -- This is needed because the end of a 'Cell'
227 -- is not necessarily the begin of the next 'Cell'.
228 fixPos :: Parser ()
229 fixPos = do
230 P.State
231 { P.stateInput = inp
232 , P.statePos = pos :| _
233 } <- P.getParserState
234 case Seq.viewl inp of
235 EmptyL -> return ()
236 t :< _ -> P.setPosition $
237 P.positionAt1 (Proxy::Proxy XMLs) pos t
238
239 instance P.Stream XMLs where
240 type Token XMLs = XML
241 type Tokens XMLs = XMLs
242 take1_ s =
243 case Seq.viewl s of
244 Tree (unCell -> XmlComment{}) _ :< ts -> P.take1_ ts
245 t:<ts -> Just (t,ts)
246 EmptyL -> Nothing
247 positionAt1 _s pos (Tree (Cell (Span{span_begin=Pos l c}:|_) _n) _ts) =
248 P.SourcePos (P.sourceName pos) (P.mkPos l) (P.mkPos c)
249 positionAtN s pos ts =
250 case Seq.viewl ts of
251 t :< _ -> P.positionAt1 s pos t
252 EmptyL -> pos
253 advance1 _s _indent pos (Tree (Cell (Span{span_end=Pos l c}:|_) _n) _ts) =
254 -- WARNING: the end of a 'Cell' is not necessarily
255 -- the beginning of the next 'Cell'.
256 P.SourcePos (P.sourceName pos) (P.mkPos l) (P.mkPos c)
257 advanceN s = foldl' . P.advance1 s
258 takeN_ n s | n <= 0 = Just (mempty, s)
259 | null s = Nothing
260 | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XmlComment and XmlInclude
261 tokensToChunk _s = Seq.fromList
262 chunkToTokens _s = toList
263 chunkLength _s = Seq.length
264 takeWhile_ = Seq.spanl
265 instance P.ShowToken XML where
266 showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks
267 where
268 showTree :: XML -> String
269 showTree (Tree a _ts) =
270 showCell a $ \case
271 XmlElem n -> "<"<>show n<>">"
272 XmlAttr n _v -> show n<>"="
273 XmlText _t -> "text"
274 XmlComment _c -> "comment"
275
276 showCell (Cell path@(Span{span_file} :| _) a) f =
277 if null span_file
278 then f a
279 else f a <> foldMap (\p -> "\n in "<>show p) path
280
281 -- ** Type 'ErrorRead'
282 data ErrorRead
283 = ErrorRead_EndOfInput
284 | ErrorRead_Not_Int TL.Text
285 | ErrorRead_Not_Nat Int
286 | ErrorRead_Not_Nat1 Int
287 -- | ErrorRead_Unexpected P.sourcePos XML
288 deriving (Eq,Ord,Show)
289 instance P.ShowErrorComponent ErrorRead where
290 showErrorComponent = show