]> Git — Sourcephile - doclang.git/blob - src/Textphile/DTC/Read/TCT.hs
stack: bump to version lts-15.4
[doclang.git] / src / Textphile / DTC / Read / TCT.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Textphile.DTC.Read.TCT where
10 import Control.Applicative (Applicative(..), optional)
11 import Control.Monad (Monad(..))
12 import Data.Bool
13 import Data.Default.Class (Default(..))
14 import Data.Either (Either(..))
15 import Data.Eq (Eq(..))
16 import Data.Foldable (Foldable(..), all)
17 import Data.Function (($), (.), const, id)
18 import Data.Functor ((<$>))
19 import Data.Int (Int)
20 import Data.List.NonEmpty (NonEmpty(..))
21 import Data.Maybe (Maybe(..), maybe)
22 import Data.Monoid (Monoid(..), First(..))
23 import Data.Ord (Ord(..))
24 import Data.Ratio ((%))
25 import Data.Semigroup (Semigroup(..))
26 import Data.Sequence (ViewL(..), (|>))
27 import Data.String (String)
28 import Data.Tuple (fst, snd)
29 import Prelude (error)
30 import Text.Blaze.DTC (xmlns_dtc)
31 import Text.Read (readMaybe, Read(..))
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.Char as Char
35 import qualified Data.List as List
36 import qualified Data.List.NonEmpty as NonEmpty
37 import qualified Data.Map.Strict as Map
38 import qualified Data.Ratio as Ratio
39 import qualified Data.Sequence as Seq
40 import qualified Data.Set as Set
41 import qualified Data.Text.Lazy as TL
42 import qualified GHC.Read as Read (expectP)
43 import qualified Symantic.RNC as RNC
44 import qualified Symantic.XML as XML
45 import qualified Text.Megaparsec as P
46 import qualified Text.Read as Read
47
48 import Textphile.TCT hiding (Parser, ErrorRead)
49 import Textphile.XML (XML, XMLs)
50 import Textphile.Utils (Nat(..), Nat1(..), succNat1)
51 import qualified Textphile.DTC.Document as DTC
52 import qualified Textphile.DTC.Sym as DTC
53 import qualified Textphile.RNC as RNC
54 import qualified Textphile.XML as XML
55 import qualified Textphile.TCT.Cell as TCT
56
57 readDTC ::
58 DTC.Sym_DTC Parser =>
59 XMLs ->
60 Either (P.ParseErrorBundle XMLs ErrorRead) DTC.Document
61 readDTC stateInput = (fst <$>) $ snd $
62 P.runParser' (S.runStateT (DTC.document <* P.eof) (def::State)) P.State
63 { P.stateInput
64 , P.stateOffset = 0
65 , P.stateParseErrors = []
66 , P.statePosState =
67 error "[BUG] validateXML: getSourcePos is not helpful here, please use annotated source locations"
68 -- NOTE: reporting the node number is less helpful
69 -- than the source text line and number where the node is;
70 -- P.statePosState is only used by P.getSourcePos.
71 }
72
73 -- * Type 'State'
74 data State = State
75 { state_posXML :: XML.Pos
76 , state_locTCT :: TCT.Location
77 } deriving (Eq,Show)
78 instance Default State where
79 def = State
80 { state_posXML = def
81 , state_locTCT = def
82 }
83
84 -- * Type 'Parser'
85 type Parser = S.StateT State (P.Parsec ErrorRead XMLs)
86
87 instance RNC.Sym_Rule Parser where
88 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
89 rule _n = id
90 arg _n = pure ()
91 instance RNC.Sym_RNC Parser where
92 namespace _p _n = pure ()
93 element n p = do
94 ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
95 p_element n p ts
96 where
97 expected = XML.Tree (XML.notSourced $ XML.NodeElem n) mempty
98 check (XML.Tree (XML.Sourced src (XML.NodeElem e)) ts)
99 | e == n
100 = Just $ XML.Sourced src $ removePI $ removeXMLNS $ removeSpaces ts
101 where
102 removePI xs =
103 (`Seq.filter` xs) $ \case
104 XML.Tree (XML.unSourced -> XML.NodePI{}) _ts -> False
105 _ -> True
106 removeSpaces xs =
107 if (`all` xs) $ \case
108 XML.Tree (XML.unSourced -> XML.NodeText (XML.EscapedText et)) _ts ->
109 all (\case
110 XML.EscapedPlain t -> TL.all Char.isSpace t
111 _ -> False) et
112 _ -> True
113 then (`Seq.filter` xs) $ \case
114 XML.Tree (XML.unSourced -> XML.NodeText{}) _ts -> False
115 _ -> True
116 else xs
117 removeXMLNS xs =
118 let (attrs,rest) = (`Seq.spanl` xs) $ \case
119 XML.Tree (XML.unSourced -> XML.NodeAttr{}) _ts -> True
120 _ -> False in
121 let attrs' = (`Seq.filter` attrs) $ \case
122 XML.Tree (XML.unSourced -> XML.NodeAttr a) _ts ->
123 case a of
124 XML.QName "" "xmlns" -> False
125 XML.QName ns _l -> ns /= XML.xmlns_xmlns
126 _ -> True in
127 attrs' <> rest
128 check _t = Nothing
129 attribute n p = do
130 ts <- P.token check (Set.singleton $ P.Tokens $ pure expected)
131 p_XMLs p ts
132 where
133 expected = XML.Tree0 (XML.notSourced $ XML.NodeAttr n)
134 check (XML.Tree (XML.unSourced -> XML.NodeAttr k)
135 v@(toList -> [XML.Tree0 (XML.unSourced -> XML.NodeText _v)])) | k == n =
136 Just v
137 check _t = Nothing
138 any = P.label "any" $
139 P.token (const $ Just ()) Set.empty
140 anyElem ns p = P.label "anyElem" $ do
141 (n,ts) <- P.token check $ Set.singleton $ P.Tokens $ pure expected
142 p_XMLs (p $ XML.qNameLocal n) ts
143 where
144 expected = XML.Tree (XML.notSourced $ XML.NodeElem $ XML.QName ns $ XML.NCName "*") mempty
145 check (XML.Tree (XML.unSourced -> XML.NodeElem e) ts)
146 | XML.qNameSpace e == ns
147 = Just $ (e,ts)
148 check _t = Nothing
149 escapedText = do
150 P.token check $ Set.singleton $ P.Tokens $ pure expected
151 where
152 expected = XML.Tree0 (XML.notSourced $ XML.NodeText $ XML.EscapedText mempty)
153 check (XML.Tree0 (XML.unSourced -> XML.NodeText t)) = Just t
154 check _t = Nothing
155 optional = P.optional
156 option = P.option
157 choice = P.choice
158 try = P.try
159 fail = P.label "fail" $ P.failure Nothing mempty
160 type instance RNC.Permutation Parser = RNC.Perm Parser
161 instance RNC.Sym_Permutation Parser where
162 runPermutation (RNC.Perm value parser) = optional parser >>= f
163 where
164 -- NOTE: copy Control.Applicative.Permutations.runPermutation
165 -- to replace the commented empty below so that P.TrivialError
166 -- has the unexpected token.
167 f Nothing = maybe {-empty-}(P.token (const Nothing) Set.empty) pure value
168 f (Just p) = RNC.runPermutation p
169 toPermutation p = RNC.Perm Nothing $ pure <$> p
170 toPermutationWithDefault v p = RNC.Perm (Just v) $ pure <$> p
171
172 instance P.Stream XMLs where
173 type Token XMLs = XML
174 type Tokens XMLs = XMLs
175 take1_ s =
176 case Seq.viewl s of
177 EmptyL -> Nothing
178 t@(XML.Tree XML.Sourced{XML.unSourced=n} _) :< ts
179 | RNC.isIgnoredNode n -> P.take1_ ts
180 | otherwise -> Just (t, ts)
181 takeN_ n s | n <= 0 = Just (mempty, s)
182 | null s = Nothing
183 | otherwise =
184 let (ns,rs) = Seq.splitAt n s in
185 let (ko,ok) = Seq.partition (RNC.isIgnoredNode . XML.unSourced . XML.unTree) ns in
186 case P.takeN_ (Seq.length ko) rs of
187 Nothing -> Just (ok, rs)
188 Just (ns',rs') -> Just (ok<>ns', rs')
189 tokensToChunk _s = Seq.fromList
190 chunkToTokens _s = toList
191 chunkLength _s = Seq.length
192 takeWhile_ = Seq.spanl
193 -- | NOTE: unimplemented: useless since each 'XML.Node' is annotated with its 'FileSource'.
194 reachOffset = error "[BUG] P.Stream XMLs: reachOffset is not helpful, please use annotated source locations"
195 -- | NOTE: useless since each 'XML.Node' is annotated with its 'FileSource'.
196 reachOffsetNoLine = error "[BUG] P.Stream XMLs: reachOffsetNoLine is not helpful, please use annotated source locations"
197 showTokens _s toks = List.intercalate ", " $ toList $ showTree <$> toks
198 where
199 showTree :: XML -> String
200 showTree (Tree a _ts) =
201 showSourced a $ \case
202 XML.NodeAttr n -> show (remove_XMLNS_DTC n)<>"="
203 XML.NodeCDATA _t -> "cdata"
204 XML.NodeComment _c -> "comment"
205 XML.NodeElem n -> "<"<>show (remove_XMLNS_DTC n)<>">"
206 XML.NodePI n _t -> "processing-instruction"<>show n
207 XML.NodeText _t -> "text"
208 remove_XMLNS_DTC n
209 | XML.qNameSpace n == xmlns_dtc = n{XML.qNameSpace=""}
210 | otherwise = n
211
212 showSourced (Sourced path@(FileRange{fileRange_file} :| _) a) f =
213 if null fileRange_file
214 then f a
215 else f a <> foldMap (\p -> "\n in "<>show p) path
216
217 -- | @p_XMLs p xs@ returns a parser parsing @xs@ entirely with @p@,
218 -- updating 'P.stateOffset' and re-raising any exception.
219 p_XMLs :: Parser a -> XMLs -> Parser a
220 p_XMLs p stateInput = do
221 s <- S.get
222 st <- P.getParserState
223 let (st', res) = P.runParser' (S.runStateT (p <* P.eof) s) P.State
224 { P.stateInput = stateInput
225 , P.stateOffset = P.stateOffset st
226 , P.statePosState = P.PosState
227 { P.pstateInput = stateInput
228 , P.pstateOffset = P.stateOffset st
229 , P.pstateSourcePos = P.pstateSourcePos $ P.statePosState st
230 , P.pstateTabWidth = P.pos1
231 , P.pstateLinePrefix = ""
232 }
233 }
234 P.updateParserState (\ps -> ps{P.stateOffset = P.stateOffset st'})
235 case res of
236 Right (a, s') -> do
237 S.put s'
238 return a
239 Left (P.ParseErrorBundle errs _) ->
240 case NonEmpty.head errs of
241 P.TrivialError _o us es -> P.failure us es
242 {-
243 lift $ P.ParsecT $ \ps _cok cerr _eok _eerr ->
244 cerr (P.TrivialError o us es) ps
245 -}
246 P.FancyError _o es -> P.fancyFailure es
247
248 p_element :: XML.QName -> Parser a -> Cell XMLs -> Parser a
249 p_element n p (Sourced state_locTCT ts) = do
250 let mayNameOrFigureName
251 | n == "aside" = Nothing
252 -- NOTE: skip aside.
253 | n == "figure"
254 -- NOTE: special case renaming the current XML.Pos
255 -- using the @type attribute to have positions like this:
256 -- section1.Quote1
257 -- section1.Example1
258 -- section1.Quote2
259 -- instead of:
260 -- section1.figure1
261 -- section1.figure2
262 -- section1.figure3
263 , Just ty <- getFirst $ (`foldMap` ts) $ \case
264 Tree (unSourced -> XML.NodeAttr "type") xs
265 | [Tree (Sourced _ (XML.NodeText t)) _] <- toList xs
266 , Just ty <- XML.ncName $ XML.unescapeText t
267 -> First $ Just ty
268 _ -> First Nothing
269 = Just $ XML.QName xmlns_dtc ty
270 | otherwise = Just n
271 case mayNameOrFigureName of
272 Nothing -> do
273 st <- S.get
274 S.put st{state_locTCT}
275 res <- p_XMLs p ts
276 S.put st
277 return res
278 Just nameOrFigureName -> do
279 st@State{state_posXML} <- S.get
280 let incrPrecedingSibling name =
281 maybe (Nat1 1) succNat1 $
282 Map.lookup name $
283 XML.pos_precedingSiblings state_posXML
284 S.put State
285 { state_posXML = state_posXML
286 -- NOTE: in children, push current name incremented on ancestors
287 -- and reset preceding siblings.
288 { XML.pos_precedingSiblings = mempty
289 , XML.pos_ancestors = XML.pos_ancestors state_posXML |> (n, incrPrecedingSibling n)
290 , XML.pos_ancestorsWithFigureNames =
291 XML.pos_ancestorsWithFigureNames state_posXML |>
292 ( nameOrFigureName
293 , incrPrecedingSibling nameOrFigureName )
294 }
295 , state_locTCT
296 }
297 res <- p_XMLs p ts
298 S.put st
299 { state_posXML = state_posXML
300 -- NOTE: after current, increment current name
301 -- and reset ancestors.
302 { XML.pos_precedingSiblings =
303 (if n == nameOrFigureName then id
304 else Map.insertWith (const succNat1) nameOrFigureName (Nat1 1)) $
305 Map.insertWith (const succNat1) n (Nat1 1) $
306 XML.pos_precedingSiblings state_posXML
307 }
308 }
309 return res
310
311 instance RNC.Sym_RNC_Extra Parser where
312 none = RNC.rule "none" $ P.eof
313 comment = do
314 s <- P.getInput
315 case Seq.viewl s of
316 Tree0 (unSourced -> XML.NodeComment c) :< ts -> do
317 P.setInput ts
318 return c
319 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
320 EmptyL -> P.failure Nothing ex
321 where
322 ex = Set.singleton $ P.Tokens $ pure expected
323 expected = Tree0 (cell0 $ XML.NodeComment "")
324 bool = RNC.rule "bool" $ RNC.text >>= \t ->
325 case t of
326 "true" -> return True
327 "false" -> return False
328 _ -> P.fancyFailure $
329 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Bool t
330 int = RNC.rule "int" $ RNC.text >>= \t ->
331 case readMaybe (TL.unpack t) of
332 Just i -> return i
333 Nothing -> P.fancyFailure $
334 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
335 rational = RNC.rule "rational" $ RNC.text >>= \t ->
336 case readMaybe (TL.unpack t) of
337 Just (Rational i) | 0 <= i -> return i
338 | otherwise -> P.fancyFailure $
339 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
340 Nothing -> P.fancyFailure $
341 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
342 rationalPositive = RNC.rule "rationalPositive" $ RNC.text >>= \t ->
343 case readMaybe (TL.unpack t) of
344 Just (Rational i) | 0 <= i -> return i
345 | otherwise -> P.fancyFailure $
346 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Positive t
347 Nothing -> P.fancyFailure $
348 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Rational t
349 nat = RNC.rule "nat" $ RNC.int >>= \i ->
350 if i >= 0
351 then return $ Nat i
352 else P.fancyFailure $ Set.singleton $
353 P.ErrorCustom $ ErrorRead_Not_Nat i
354 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
355 if i > 0
356 then return $ Nat1 i
357 else P.fancyFailure $ Set.singleton $
358 P.ErrorCustom $ ErrorRead_Not_Nat1 i
359 instance DTC.Sym_DTC Parser where
360 positionXML = S.gets state_posXML
361 locationTCT = S.gets state_locTCT
362
363
364 -- ** Type 'ErrorRead'
365 data ErrorRead
366 = ErrorRead_EndOfInput
367 | ErrorRead_Not_Bool TL.Text
368 | ErrorRead_Not_Int TL.Text
369 | ErrorRead_Not_Nat Int
370 | ErrorRead_Not_Nat1 Int
371 | ErrorRead_Not_Rational TL.Text
372 | ErrorRead_Not_Positive TL.Text
373 deriving (Eq,Ord,Show)
374 instance P.ShowErrorComponent ErrorRead where
375 showErrorComponent = show
376
377 -- ** Type 'Rational'
378 -- | Wrapper to change the 'Read' instance.
379 newtype Rational = Rational Ratio.Rational
380 instance Read Rational where
381 readPrec = do
382 x <- Read.step readPrec
383 Read.expectP (Read.Symbol "/")
384 y <- Read.step readPrec
385 return $ Rational (x % y)