]> Git — Sourcephile - haskell/symantic-parser.git/blob - src/Symantic/Parser/Automaton/Input.hs
add farthest position heuristic for parsing error messages
[haskell/symantic-parser.git] / src / Symantic / Parser / Automaton / Input.hs
1 {-# LANGUAGE AllowAmbiguousTypes #-}
2 {-# LANGUAGE MagicHash #-}
3 {-# LANGUAGE TemplateHaskell #-}
4 {-# LANGUAGE UnboxedTuples #-}
5 module Symantic.Parser.Automaton.Input where
6
7 import Data.Array.Base (UArray(..), listArray)
8 -- import Data.Array.Unboxed (UArray)
9 import Data.Bool
10 import Data.ByteString.Internal (ByteString(..))
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Function (on)
14 import Data.Int (Int)
15 import Data.Kind (Type)
16 import Data.Ord (Ord(..), Ordering)
17 import Data.String (String)
18 import Data.Text ()
19 import Data.Text.Array ({-aBA, empty-})
20 import Data.Text.Internal (Text(..))
21 import Data.Text.Unsafe (iter, Iter(..), iter_, reverseIter_)
22 import Text.Show (Show(..))
23 import GHC.Exts (Int(..), Char(..){-, RuntimeRep(..)-})
24 import GHC.ForeignPtr (ForeignPtr(..), ForeignPtrContents)
25 import GHC.Prim ({-Int#,-} Addr#, nullAddr#, indexWideCharArray#, {-indexWord16Array#,-} readWord8OffAddr#, word2Int#, chr#, touch#, realWorld#, plusAddr#, (+#))
26 import Language.Haskell.TH (CodeQ)
27 import Prelude ((+), (-), error)
28 import qualified Data.ByteString.Lazy.Internal as BSL
29 import qualified Data.List as List
30
31 -- * Class 'Cursorable'
32 class Show cur => Cursorable cur where
33 offset :: cur -> Int
34 compareOffset :: CodeQ (cur -> cur -> Ordering)
35 compareOffset = [|| compare `on` offset ||]
36 lowerOffset :: CodeQ (cur -> cur -> Bool)
37 sameOffset :: CodeQ (cur -> cur -> Bool)
38 shiftRight :: CodeQ (cur -> Int -> cur)
39 instance Cursorable Int where
40 offset = \inp -> inp
41 compareOffset = [|| compare @Int ||]
42 lowerOffset = [|| (<) @Int ||]
43 sameOffset = [|| (==) @Int ||]
44 shiftRight = [|| (+) @Int ||]
45 instance Cursorable Text where
46 offset = \(Text _ i _) -> i
47 lowerOffset = [|| \(Text _ i _) (Text _ j _) -> i < j ||]
48 sameOffset = [|| \(Text _ i _) (Text _ j _) -> i == j ||]
49 shiftRight = [||shiftRightText||]
50
51 shiftRightText :: Text -> Int -> Text
52 shiftRightText (Text arr off unconsumed) i = go i off unconsumed
53 where
54 go 0 off' unconsumed' = Text arr off' unconsumed'
55 go n off' unconsumed'
56 | unconsumed' > 0 = let !d = iter_ (Text arr off' unconsumed') 0
57 in go (n-1) (off'+d) (unconsumed'-d)
58 | otherwise = Text arr off' unconsumed'
59
60 shiftLeftText :: Text -> Int -> Text
61 shiftLeftText (Text arr off unconsumed) i = go i off unconsumed
62 where
63 go 0 off' unconsumed' = Text arr off' unconsumed'
64 go n off' unconsumed'
65 | off' > 0 = let !d = reverseIter_ (Text arr off' unconsumed') 0 in go (n-1) (off'+d) (unconsumed'-d)
66 | otherwise = Text arr off' unconsumed'
67
68 instance Cursorable UnpackedLazyByteString where
69 offset = \(UnpackedLazyByteString i _ _ _ _ _) -> i
70 lowerOffset = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i <= j||]
71 sameOffset = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i == j||]
72 shiftRight = [||shiftRightByteString||]
73
74 shiftRightByteString :: UnpackedLazyByteString -> Int -> UnpackedLazyByteString
75 shiftRightByteString !(UnpackedLazyByteString i addr# final off size cs) j
76 | j < size = UnpackedLazyByteString (i + j) addr# final (off + j) (size - j) cs
77 | otherwise = case cs of
78 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' -> shiftRightByteString (UnpackedLazyByteString (i + size) addr'# final' off' size' cs') (j - size)
79 BSL.Empty -> emptyUnpackedLazyByteString (i + size)
80
81 shiftLeftByteString :: UnpackedLazyByteString -> Int -> UnpackedLazyByteString
82 shiftLeftByteString (UnpackedLazyByteString i addr# final off size cs) j =
83 UnpackedLazyByteString (i - d) addr# final (off - d) (size + d) cs
84 where d = min off j
85
86 offWith :: CodeQ (ts -> OffWith ts)
87 offWith = [|| OffWith 0 ||]
88
89 -- ** Type 'Text16'
90 newtype Text16 = Text16 Text
91 --newtype CacheText = CacheText Text
92 -- ** Type 'CharList'
93 newtype CharList = CharList String
94 -- ** Type 'Stream'
95 data Stream = {-# UNPACK #-} !Char :> Stream
96 nomore :: Stream
97 nomore = '\0' :> nomore
98 {-
99 instance Cursorable (OffWith Stream) where
100 lowerOffset = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
101 sameOffset = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
102 shiftRight = [|| \(OffWith o ts) i -> OffWith (o + i) (dropStream i ts) ||]
103 where
104 dropStream :: Int -> Stream -> Stream
105 dropStream 0 cs = cs
106 dropStream n (_ :> cs) = dropStream (n-1) cs
107 -}
108
109 -- ** Type 'OffWith'
110 data OffWith ts = OffWith {-# UNPACK #-} !Int ts
111 deriving (Show)
112
113 instance Cursorable (OffWith String) where
114 offset = \(OffWith i _) -> i
115 lowerOffset = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
116 sameOffset = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
117 shiftRight = [|| \(OffWith o ts) i -> OffWith (o + i) (List.drop i ts) ||]
118
119 -- ** Type 'OffWithStreamAnd'
120 data OffWithStreamAnd ts = OffWithStreamAnd {-# UNPACK #-} !Int !Stream ts
121 -- ** Type 'UnpackedLazyByteString'
122 data UnpackedLazyByteString = UnpackedLazyByteString
123 {-# UNPACK #-} !Int
124 !Addr#
125 ForeignPtrContents
126 {-# UNPACK #-} !Int
127 {-# UNPACK #-} !Int
128 BSL.ByteString
129 instance Show UnpackedLazyByteString where
130 show (UnpackedLazyByteString _i _addr _p _off _size _cs) = "UnpackedLazyByteString" -- FIXME
131
132 {-# INLINE emptyUnpackedLazyByteString #-}
133 emptyUnpackedLazyByteString :: Int -> UnpackedLazyByteString
134 emptyUnpackedLazyByteString i =
135 UnpackedLazyByteString i nullAddr#
136 (error "nullForeignPtr") 0 0 BSL.Empty
137
138 -- * Class 'Input'
139 class Cursorable (Cursor inp) => Input inp where
140 type Cursor inp :: Type
141 type InputToken inp :: Type
142 cursorOf :: CodeQ inp -> CodeQ
143 (# {-init-} Cursor inp
144 , {-more-} Cursor inp -> Bool
145 , {-next-} Cursor inp -> (# InputToken inp, Cursor inp #)
146 #)
147
148 instance Input String where
149 type Cursor String = Int
150 type InputToken String = Char
151 cursorOf input = cursorOf @(UArray Int Char)
152 [|| listArray (0, List.length $$input-1) $$input ||]
153 instance Input (UArray Int Char) where
154 type Cursor (UArray Int Char) = Int
155 type InputToken (UArray Int Char) = Char
156 cursorOf qinput = [||
157 let UArray _ _ size input# = $$qinput
158 next (I# i#) =
159 (# C# (indexWideCharArray# input# i#)
160 , I# (i# +# 1#)
161 #)
162 in (# 0, (< size), next #)
163 ||]
164 instance Input Text where
165 type Cursor Text = Text
166 type InputToken Text = Char
167 cursorOf inp = [||
168 let _ = "cursorOf" in
169 let next t@(Text arr off unconsumed) =
170 let !(Iter c d) = iter t 0 in
171 (# c, Text arr (off+d) (unconsumed-d) #)
172 more (Text _ _ unconsumed) = unconsumed > 0
173 in (# $$inp, more, next #)
174 ||]
175 instance Input ByteString where
176 type Cursor ByteString = Int
177 type InputToken ByteString = Char
178 cursorOf qinput = [||
179 let PS (ForeignPtr addr# final) off size = $$qinput
180 next i@(I# i#) =
181 case readWord8OffAddr# (addr# `plusAddr#` i#) 0# realWorld# of
182 (# s', x #) -> case touch# final s' of
183 _ -> (# C# (chr# (word2Int# x)), i + 1 #)
184 in (# off, (< size), next #)
185 ||]
186 instance Input BSL.ByteString where
187 type Cursor BSL.ByteString = UnpackedLazyByteString
188 type InputToken BSL.ByteString = Char
189 cursorOf qinput = [||
190 let next (UnpackedLazyByteString i addr# final off@(I# off#) size cs) =
191 case readWord8OffAddr# addr# off# realWorld# of
192 (# s', x #) -> case touch# final s' of
193 _ ->
194 (# C# (chr# (word2Int# x))
195 , if size /= 1 then UnpackedLazyByteString (i+1) addr# final (off+1) (size-1) cs
196 else case cs of
197 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' -> UnpackedLazyByteString (i+1) addr'# final' off' size' cs'
198 BSL.Empty -> emptyUnpackedLazyByteString (i+1)
199 #)
200 more (UnpackedLazyByteString _ _ _ _ 0 _) = False
201 more _ = True
202 init = case $$qinput of
203 BSL.Chunk (PS (ForeignPtr addr# final) off size) cs -> UnpackedLazyByteString 0 addr# final off size cs
204 BSL.Empty -> emptyUnpackedLazyByteString 0
205 in (# init, more, next #)
206 ||]
207 {-
208 instance Input Text16 where
209 type Cursor Text16 = Int
210 cursorOf qinput = [||
211 let Text16 (Text arr off size) = $$qinput
212 arr# = aBA arr
213 next (I# i#) =
214 (# C# (chr# (word2Int# (indexWord16Array# arr# i#)))
215 , I# (i# +# 1#) #)
216 in (# off, (< size), next #)
217 ||]
218 instance Input CharList where
219 type Cursor CharList = OffWith String
220 cursorOf qinput = [||
221 let CharList input = $$qinput
222 next (OffWith i (c:cs)) = (# c, OffWith (i+1) cs #)
223 size = List.length input
224 more (OffWith i _) = i < size
225 --more (OffWith _ []) = False
226 --more _ = True
227 in (# $$offWith input, more, next #)
228 ||]
229 instance Input Stream where
230 type Cursor Stream = OffWith Stream
231 cursorOf qinput = [||
232 let next (OffWith o (c :> cs)) = (# c, OffWith (o + 1) cs #)
233 in (# $$offWith $$qinput, const True, next #)
234 ||]
235 -}
236 {-
237 -- type instance Cursor CacheText = (Text, Stream)
238 -- type instance Cursor BSL.ByteString = OffWith BSL.ByteString
239 -}