]> Git — Sourcephile - doclang.git/blob - Language/DTC/Read/TCT.hs
Add multi-words indexing.
[doclang.git] / Language / DTC / Read / TCT.hs
1 {-# LANGUAGE BangPatterns #-}
2 {-# LANGUAGE FlexibleContexts #-}
3 {-# LANGUAGE FlexibleInstances #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ScopedTypeVariables #-}
6 {-# LANGUAGE TypeFamilies #-}
7 {-# LANGUAGE ViewPatterns #-}
8 {-# OPTIONS_GHC -fno-warn-orphans #-}
9 -- | Read DTC from TCT.
10 module Language.DTC.Read.TCT where
11
12 import Control.Applicative (Applicative(..))
13 import Control.Monad (Monad(..))
14 import Data.Bool
15 import Data.Either (Either(..))
16 import Data.Eq (Eq(..))
17 import Data.Foldable (null, foldl')
18 import Data.Function (($), (.), const, id)
19 import Data.Functor ((<$>), (<$))
20 import Data.Int (Int)
21 import Data.List.NonEmpty (NonEmpty(..))
22 import Data.Maybe (Maybe(..), fromMaybe, maybe)
23 import Data.Monoid (Monoid(..))
24 import Data.Ord (Ord(..))
25 import Data.Proxy (Proxy(..))
26 import Data.Semigroup (Semigroup(..))
27 import Data.Sequence (ViewL(..))
28 import Data.String (String)
29 import Data.Text (Text)
30 import Data.Tuple (snd)
31 import GHC.Exts (toList)
32 import Prelude (Num(..))
33 import Text.Read (readMaybe)
34 import Text.Show (Show(..))
35 import qualified Control.Monad.Trans.State as S
36 import qualified Data.List as List
37 import qualified Data.Map.Strict as Map
38 import qualified Data.Sequence as Seq
39 import qualified Data.Set as Set
40 import qualified Data.Text as Text
41 import qualified Text.Megaparsec as P
42 import qualified Text.Megaparsec.Perm as P
43
44 import Language.TCT hiding (Parser)
45 import Language.XML
46 import qualified Language.DTC.Document as DTC
47 import qualified Language.DTC.Sym as DTC
48 import qualified Language.RNC.Sym as RNC
49
50 -- * Type 'Parser'
51 -- type Parser = P.Parsec Error XMLs
52 type Parser = S.StateT XmlPos (P.Parsec Error XMLs)
53
54 instance RNC.Sym_Rule Parser where
55 -- rule n p = P.dbg s p {-(p P.<?> s)-} where s = Text.unpack n
56 rule _n = id
57 instance RNC.Sym_RNC Parser where
58 none = P.label "none" $ P.eof
59 any = P.label "any" $ p_satisfyMaybe $ const $ Just ()
60 anyElem p = P.label "anyElem" $ do
61 (n,ts) <- P.token check $ Just expected
62 parserXMLs (p n) ts
63 where
64 expected = TreeN (cell0 "") mempty
65 check (TreeN (unCell -> n) ts) = Right (n,ts)
66 check t = Left
67 ( Just $ P.Tokens $ pure t
68 , Set.singleton $ P.Tokens $ pure expected )
69 position = S.get
70 element n p = do
71 ts <- P.token check $ Just expected
72 xp <- S.get
73 S.put xp
74 { xmlPosAncestors = (n,maybe 1 (+1) $ Map.lookup n $ xmlPosPrecedingsSiblings xp):xmlPosAncestors xp
75 , xmlPosPrecedingsSiblings = mempty
76 }
77 parserXMLs p ts <* S.put xp
78 { xmlPosPrecedingsSiblings =
79 Map.insertWith (\_new old -> old + 1) n 1 $
80 xmlPosPrecedingsSiblings xp
81 }
82 where
83 expected = TreeN (cell0 n) mempty
84 check (TreeN (unCell -> e) ts) | e == n = Right ts
85 check t = Left
86 ( Just $ P.Tokens $ pure t
87 , Set.singleton $ P.Tokens $ pure expected )
88 attribute n p = do
89 v <- P.token check $ Just expected
90 parserXMLs p v
91 where
92 expected = Tree0 (cell0 $ XmlAttr n "")
93 check (TreeN (unCell -> e) ts) | e == n = Right ts
94 check (Tree0 (Cell bp ep (XmlAttr k v))) | k == n =
95 Right $ Seq.singleton $ Tree0 $ Cell bp ep $ XmlText v
96 check t = Left
97 ( Just $ P.Tokens $ pure t
98 , Set.singleton $ P.Tokens $ pure expected )
99 comment = do
100 s <- P.getInput
101 case Seq.viewl s of
102 Tree0 (unCell -> XmlComment c) :< ts -> do
103 P.setInput ts
104 c <$ fixPos
105 t :< _ts -> P.failure (Just $ P.Tokens $ pure t) ex
106 EmptyL -> P.failure Nothing ex
107 where
108 ex = Set.singleton $ P.Tokens $ pure expected
109 expected = Tree0 (cell0 $ XmlComment "")
110 text = do
111 P.token check (Just expected)
112 <* fixPos
113 where
114 expected = Tree0 (cell0 $ XmlText "")
115 check (Tree0 (unCell -> XmlText t)) = Right t
116 check t = Left
117 ( Just $ P.Tokens $ pure t
118 , Set.singleton $ P.Tokens $ pure expected )
119 int = RNC.rule "int" $ RNC.text >>= \t ->
120 case readMaybe (Text.unpack t) of
121 Just i -> return i
122 Nothing -> P.fancyFailure $
123 Set.singleton $ P.ErrorCustom $ Error_Not_Int t
124 nat = RNC.rule "nat" $ RNC.int >>= \i ->
125 if i >= 0
126 then return $ Nat i
127 else P.fancyFailure $ Set.singleton $
128 P.ErrorCustom $ Error_Not_Nat i
129 nat1 = RNC.rule "nat1" $ RNC.int >>= \i ->
130 if i > 0
131 then return $ Nat1 i
132 else P.fancyFailure $ Set.singleton $
133 P.ErrorCustom $ Error_Not_Nat1 i
134 (<|>) = (P.<|>)
135 many = P.many
136 some = P.some
137 optional = P.optional
138 option = P.option
139 choice = P.choice
140 try = P.try
141 type instance RNC.Perm Parser = P.PermParser XMLs Parser
142 instance RNC.Sym_Interleaved Parser where
143 interleaved = P.makePermParser
144 (<$$>) = (P.<$$>)
145 (<||>) = (P.<||>)
146 (<$?>) = (P.<$?>)
147 (<|?>) = (P.<|?>)
148 f <$*> a = f P.<$?> ([],P.some a)
149 f <|*> a = f P.<|?> ([],P.some a)
150 instance DTC.Sym_DTC Parser
151
152 readDTC ::
153 DTC.Sym_DTC Parser =>
154 XMLs ->
155 Either (P.ParseError (P.Token XMLs) Error) DTC.Document
156 readDTC =
157 parseXMLs
158 XmlPos { xmlPosAncestors = []
159 , xmlPosPrecedingsSiblings = mempty
160 }
161 (P.initialPos "")
162 DTC.document
163
164 parseXMLs ::
165 DTC.Sym_DTC Parser =>
166 XmlPos ->
167 P.SourcePos -> Parser a -> XMLs ->
168 Either (P.ParseError (P.Token XMLs) Error) a
169 parseXMLs xp pos p i =
170 snd $
171 P.runParser' ((`S.evalStateT` xp) $ p <* RNC.none)
172 P.State
173 { P.stateInput = i
174 , P.statePos = pure $
175 case Seq.viewl i of
176 Tree0 c :< _ -> sourcePosCell c
177 TreeN c _ :< _ -> sourcePosCell c
178 _ -> pos
179 , P.stateTabWidth = P.pos1
180 , P.stateTokensProcessed = 0
181 }
182
183 -- | @parserXMLs xp pos p xs@ returns a 'Parser' parsing @xs@ with @p@ from position @xp@.
184 parserXMLs ::
185 DTC.Sym_DTC Parser =>
186 Parser a -> XMLs -> Parser a
187 parserXMLs p xs = do
188 pos <- P.getPosition
189 xp <- S.get
190 case parseXMLs xp pos p xs of
191 Left (P.TrivialError (posErr:|_) un ex) -> do
192 P.setPosition posErr
193 P.failure un ex
194 Left (P.FancyError (posErr:|_) errs) -> do
195 P.setPosition posErr
196 P.fancyFailure errs
197 Right a -> a <$ fixPos
198
199 -- | Adjust the current 'P.SourcePos'
200 -- to be the begining of the following-sibling 'XML' node
201 -- so that on error 'P.toHints' keeps expected 'P.Token's in the list,
202 -- and thus makes useful error messages.
203 --
204 -- This is needed because the end of a 'Cell'
205 -- is not necessarily the begin of the next 'Cell'.
206 fixPos :: Parser ()
207 fixPos = do
208 P.State
209 { P.stateInput = inp
210 , P.statePos = pos :| _
211 } <- P.getParserState
212 case Seq.viewl inp of
213 EmptyL -> return ()
214 t :< _ -> P.setPosition $
215 P.positionAt1 (Proxy::Proxy XMLs) pos t
216
217 sourcePosCell :: Cell a -> P.SourcePos
218 sourcePosCell c =
219 P.SourcePos ""
220 (P.mkPos $ lineCell c)
221 (P.mkPos $ columnCell c)
222
223 sourcePos :: Pos -> Maybe P.SourcePos
224 sourcePos (Pos l c) | l>0 && c>0 = Just $ P.SourcePos "" (P.mkPos l) (P.mkPos c)
225 sourcePos _ = Nothing
226
227 instance P.Stream XMLs where
228 type Token XMLs = XML
229 type Tokens XMLs = XMLs
230 take1_ s =
231 case Seq.viewl s of
232 Tree0 (unCell -> XmlComment{}) :< ts -> P.take1_ ts
233 t:<ts -> Just (t,ts)
234 EmptyL -> Nothing
235 positionAt1 _s pos t =
236 fromMaybe pos $ sourcePos $
237 case t of
238 TreeN c _ -> posCell c
239 Tree0 c -> posCell c
240 positionAtN s pos ts =
241 case Seq.viewl ts of
242 t :< _ -> P.positionAt1 s pos t
243 _ -> pos
244 advance1 _s _indent pos t =
245 -- WARNING: the end of a 'Cell' is not necessarily
246 -- the beginning of the next 'Cell'.
247 fromMaybe pos $ sourcePos $
248 case t of
249 TreeN c _ -> posEndCell c
250 Tree0 c -> posEndCell c
251 advanceN s = foldl' . P.advance1 s
252 takeN_ n s
253 | n <= 0 = Just (mempty, s)
254 | null s = Nothing
255 | otherwise = Just (Seq.splitAt n s) -- FIXME: handle XmlComment
256 tokensToChunk _s = Seq.fromList
257 chunkToTokens _s = toList
258 chunkLength _s = Seq.length
259 takeWhile_ = Seq.spanl
260 instance P.ShowToken XML where
261 showTokens toks = List.intercalate ", " $ toList $ showTree <$> toks
262 where
263 showTree :: XML -> String
264 showTree = \case
265 Tree0 c -> showCell c showXmlLeaf
266 TreeN c _ts -> showCell c showXmlName
267
268 showCell (Cell (Pos 0 0) (Pos 0 0) a) f = f a
269 showCell (Cell bp ep a) f = f a<>" at "<>show bp<>"-"<>show ep
270
271 showXmlLeaf = \case
272 XmlAttr n _v -> show n<>"="
273 XmlText _t -> "text"
274 XmlComment _c -> "comment"
275 showXmlName n = "<"<>show n<>">"
276
277 -- ** Type 'Error'
278 data Error
279 = Error_EndOfInput
280 | Error_Not_Int Text
281 | Error_Not_Nat Int
282 | Error_Not_Nat1 Int
283 -- | Error_Unexpected P.sourcePos XML
284 deriving (Eq,Ord,Show)
285 instance P.ShowErrorComponent Error where
286 showErrorComponent = show