]> Git — Sourcephile - doclang.git/blob - Language/DTC/Read/TCT.hs
Massage Data.TreeSeq.
[doclang.git] / Language / DTC / Read / TCT.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedStrings #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 {-# LANGUAGE ViewPatterns #-}
7 {-# OPTIONS_GHC -fno-warn-orphans #-}
8 -- | Read DTC from TCT.
9 module Language.DTC.Read.TCT where
10
11 import Control.Applicative (Applicative(..))
12 import Control.Monad (Monad(..))
13 import Data.Bool
14 import Data.Default.Class (Default(..))
15 import Data.Either (Either(..))
16 import Data.Eq (Eq(..))
17 import Data.Foldable (Foldable(..))
18 import Data.Function (($), (.), const, id)
19 import Data.Functor ((<$>), (<$))
20 import Data.Int (Int)
21 import Data.List.NonEmpty (NonEmpty(..))
22 import Data.Maybe (Maybe(..), fromMaybe, maybe)
23 import Data.Monoid (Monoid(..), First(..))
24 import Data.Ord (Ord(..))
25 import Data.Proxy (Proxy(..))
26 import Data.Semigroup (Semigroup(..))
27 import Data.Sequence (ViewL(..), (|>))
28 import Data.String (String)
29 import Data.Tuple (snd)
30 import Prelude (Num(..))
31 import Text.Read (readMaybe)
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.List as List
35 import qualified Data.Map.Strict as Map
36 import qualified Data.Sequence as Seq
37 import qualified Data.Set as Set
38 import qualified Data.Text.Lazy as TL
39 import qualified Text.Megaparsec as P
40 import qualified Text.Megaparsec.Perm as P
41
42 import Language.TCT hiding (Parser, ErrorRead)
43 import Language.XML
44 import qualified Language.DTC.Document as DTC
45 import qualified Language.DTC.Sym as DTC
46 import qualified Language.RNC.Sym as RNC
47
48 -- * Type 'State'
49 type State = DTC.Pos
50
51 -- * Type 'Parser'
52 -- type Parser = P.Parsec ErrorRead XMLs
53 type Parser = S.StateT State (P.Parsec ErrorRead XMLs)
54
55 instance RNC.Sym_Rule Parser where
56 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
57 rule _n = id
58 instance RNC.Sym_RNC Parser where
59 none = P.label "none" $ P.eof
60 any = P.label "any" $ p_satisfyMaybe $ const $ Just ()
61 anyElem p = P.label "anyElem" $ do
62 (n,ts) <- P.token check $ Just expected
63 parserXMLs (p n) ts
64 where
65 expected = Tree (cell0 $ XmlElem "") mempty
66 check (Tree (unCell -> XmlElem n) ts) = Right (n,ts)
67 check t = Left
68 ( Just $ P.Tokens $ pure t
69 , Set.singleton $ P.Tokens $ pure expected )
70 element n p = do
71 ts <- P.token check $ Just expected
72 xp <- S.get
73 let nameOrFigureName
74 | n == "figure"
75 -- NOTE: special case renaming the current DTC.Pos
76 -- using the @type attribute to have positions like this:
77 -- section1.Quote1
78 -- section1.Example1
79 -- section1.Quote2
80 -- instead of:
81 -- section1.figure1
82 -- section1.figure2
83 -- section1.figure3
84 , Just ty <- getFirst $ (`foldMap` ts) $ \case
85 Tree0 (unCell -> XmlAttr "type" ty) -> First $ Just ty
86 _ -> First Nothing
87 = xmlLocalName $ ty
88 | otherwise = n
89 let anc name = maybe 1 (+1) $ Map.lookup name $ DTC.posPrecedingsSiblings xp
90 S.put xp
91 { DTC.posAncestors = DTC.posAncestors xp |> (n,anc n)
92 , DTC.posAncestorsWithFigureNames =
93 DTC.posAncestorsWithFigureNames xp |>
94 (nameOrFigureName,anc nameOrFigureName)
95 , DTC.posPrecedingsSiblings = mempty
96 }
97 parserXMLs p ts <* S.put xp
98 { DTC.posPrecedingsSiblings=
99 (if n /= nameOrFigureName
100 then Map.insertWith (\_new old -> old + 1) nameOrFigureName 1
101 else id) $
102 Map.insertWith (\_new old -> old + 1) n 1 $
103 DTC.posPrecedingsSiblings xp
104 }
105 where
106 expected = Tree (cell0 $ XmlElem n) mempty
107 check (Tree (unCell -> XmlElem e) ts) | e == n = Right ts
108 check t = Left
109 ( Just $ P.Tokens $ pure t
110 , Set.singleton $ P.Tokens $ pure expected )
111 attribute n p = do
112 v <- P.token check $ Just expected
113 parserXMLs p v
114 where
115 expected = Tree0 (cell0 $ XmlAttr n "")
116 check (Tree (unCell -> XmlElem e) ts) | e == n = Right ts
117 check (Tree0 (Cell bp ep (XmlAttr k v))) | k == n =
118 Right $ Seq.singleton $ Tree0 $ Cell bp ep $ XmlText v
119 check t = Left
120 ( Just $ P.Tokens $ pure t
121 , Set.singleton $ P.Tokens $ pure expected )
122 comment = do
123 s <- P.getInput
124 case Seq.viewl s of
125 Tree0 (unCell -> XmlComment c) :< ts -> do
126 P.setInput ts
127 c <$ fixPos
128 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
129 EmptyL -> P.failure Nothing ex
130 where
131 ex = Set.singleton $ P.Tokens $ pure expected
132 expected = Tree0 (cell0 $ XmlComment "")
133 text = do
134 P.token check (Just expected)
135 <* fixPos
136 where
137 expected = Tree0 (cell0 $ XmlText "")
138 check (Tree0 (unCell -> XmlText t)) = Right t
139 check t = Left
140 ( Just $ P.Tokens $ pure t
141 , Set.singleton $ P.Tokens $ pure expected )
142 int = RNC.rule "int" $ RNC.text >>= \t ->
143 case readMaybe (TL.unpack t) of
144 Just i -> return i
145 Nothing -> P.fancyFailure $
146 Set.singleton $ P.ErrorCustom $ ErrorRead_Not_Int t
147 nat = RNC.rule "nat" $ RNC.int >>= \i ->
148 if i >= 0
149 then return $ Nat i
150 else P.fancyFailure $ Set.singleton $
151 P.ErrorCustom $ ErrorRead_Not_Nat i
152 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
153 if i > 0
154 then return $ Nat1 i
155 else P.fancyFailure $ Set.singleton $
156 P.ErrorCustom $ ErrorRead_Not_Nat1 i
157 (<|>) = (P.<|>)
158 many = P.many
159 some = P.some
160 optional = P.optional
161 option = P.option
162 choice = P.choice
163 try = P.try
164 type instance RNC.Perm Parser = P.PermParser XMLs Parser
165 instance RNC.Sym_Interleaved Parser where
166 interleaved = P.makePermParser
167 (<$$>) = (P.<$$>)
168 (<||>) = (P.<||>)
169 (<$?>) = (P.<$?>)
170 (<|?>) = (P.<|?>)
171 f <$*> a = f P.<$?> ([],P.some a)
172 f <|*> a = f P.<|?> ([],P.some a)
173 instance DTC.Sym_DTC Parser where
174 position = S.get
175
176 readDTC ::
177 DTC.Sym_DTC Parser =>
178 XMLs ->
179 Either (P.ParseError (P.Token XMLs) ErrorRead) DTC.Document
180 readDTC = parseXMLs def (P.initialPos "") DTC.document
181
182 parseXMLs ::
183 DTC.Sym_DTC Parser =>
184 State ->
185 P.SourcePos -> Parser a -> XMLs ->
186 Either (P.ParseError (P.Token XMLs) ErrorRead) a
187 parseXMLs st pos p i =
188 snd $
189 P.runParser' ((`S.evalStateT` st) $ p <* RNC.none)
190 P.State
191 { P.stateInput = i
192 , P.statePos = pure $
193 case Seq.viewl i of
194 Tree c _ :< _ -> sourcePosCell c
195 _ -> pos
196 , P.stateTabWidth = P.pos1
197 , P.stateTokensProcessed = 0
198 }
199
200 -- | @parserXMLs st pos p xs@ returns a 'Parser' parsing @xs@ with @p@ from state @st@.
201 parserXMLs ::
202 DTC.Sym_DTC Parser =>
203 Parser a -> XMLs -> Parser a
204 parserXMLs p xs = do
205 pos <- P.getPosition
206 st <- S.get
207 case parseXMLs st pos p xs of
208 Left (P.TrivialError (posErr:|_) un ex) -> do
209 P.setPosition posErr
210 P.failure un ex
211 Left (P.FancyError (posErr:|_) errs) -> do
212 P.setPosition posErr
213 P.fancyFailure errs
214 Right a -> a <$ fixPos
215
216 -- | Adjust the current 'P.SourcePos'
217 -- to be the begining of the following-sibling 'XML' node
218 -- so that on error 'P.toHints' keeps expected 'P.Token's in the list,
219 -- and thus makes useful error messages.
220 --
221 -- This is needed because the end of a 'Cell'
222 -- is not necessarily the begin of the next 'Cell'.
223 fixPos :: Parser ()
224 fixPos = do
225 P.State
226 { P.stateInput = inp
227 , P.statePos = pos :| _
228 } <- P.getParserState
229 case Seq.viewl inp of
230 EmptyL -> return ()
231 t :< _ -> P.setPosition $
232 P.positionAt1 (Proxy::Proxy XMLs) pos t
233
234 sourcePosCell :: Cell a -> P.SourcePos
235 sourcePosCell (cell_begin -> bp) =
236 P.SourcePos ""
237 (P.mkPos $ pos_line bp)
238 (P.mkPos $ pos_column bp)
239
240 sourcePos :: Pos -> Maybe P.SourcePos
241 sourcePos (Pos l c) | l>0 && c>0 = Just $ P.SourcePos "" (P.mkPos l) (P.mkPos c)
242 sourcePos _ = Nothing
243
244 instance P.Stream XMLs where
245 type Token XMLs = XML
246 type Tokens XMLs = XMLs
247 take1_ s =
248 case Seq.viewl s of
249 Tree0 (unCell -> XmlComment{}) :< ts -> P.take1_ ts
250 t:<ts -> Just (t,ts)
251 EmptyL -> Nothing
252 positionAt1 _s pos =
253 fromMaybe pos . sourcePos .
254 cell_begin . unTree
255 positionAtN s pos ts =
256 case Seq.viewl ts of
257 t :< _ -> P.positionAt1 s pos t
258 _ -> pos
259 advance1 _s _indent pos =
260 -- WARNING: the end of a 'Cell' is not necessarily
261 -- the beginning of the next 'Cell'.
262 fromMaybe pos . sourcePos .
263 cell_end . unTree
264 advanceN s = foldl' . P.advance1 s
265 takeN_ n s
266 | n <= 0 = Just (mempty, s)
267 | null s = Nothing
268 | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XmlComment
269 tokensToChunk _s = Seq.fromList
270 chunkToTokens _s = toList
271 chunkLength _s = Seq.length
272 takeWhile_ = Seq.spanl
273 instance P.ShowToken XML where
274 showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks
275 where
276 showTree :: XML -> String
277 showTree (Tree a _ts) =
278 showCell a $ \case
279 XmlElem n -> "<"<>show n<>">"
280 XmlAttr n _v -> show n<>"="
281 XmlText _t -> "text"
282 XmlComment _c -> "comment"
283
284 showCell (Cell (Pos 0 0) (Pos 0 0) a) f = f a
285 showCell (Cell bp ep a) f = f a<>" at "<>show bp<>"-"<>show ep
286
287 -- ** Type 'ErrorRead'
288 data ErrorRead
289 = ErrorRead_EndOfInput
290 | ErrorRead_Not_Int TL.Text
291 | ErrorRead_Not_Nat Int
292 | ErrorRead_Not_Nat1 Int
293 -- | ErrorRead_Unexpected P.sourcePos XML
294 deriving (Eq,Ord,Show)
295 instance P.ShowErrorComponent ErrorRead where
296 showErrorComponent = show