1 {-# LANGUAGE AllowAmbiguousTypes #-}
2 {-# LANGUAGE MagicHash #-}
3 {-# LANGUAGE TemplateHaskell #-}
4 {-# LANGUAGE UnboxedTuples #-}
5 {-# LANGUAGE PolyKinds #-}
6 module Symantic.Parser.Machine.Input where
8 import Data.Array.Base (UArray(..), listArray)
9 -- import Data.Array.Unboxed (UArray)
11 import Data.ByteString.Internal (ByteString(..))
12 import Data.Char (Char)
13 import Data.Word (Word8)
14 import Data.Eq (Eq(..))
15 import Data.Function (on)
17 import Data.Kind (Type)
18 import Data.Ord (Ord(..), Ordering)
19 import Data.String (String)
21 import Data.Text.Array ({-aBA, empty-})
22 import Data.Text.Internal (Text(..))
23 --import Data.Text.Unsafe (iter, Iter(..), iter_, reverseIter_)
24 import qualified Data.Text.Unsafe as Text.Unsafe
25 import Text.Show (Show(..))
26 import GHC.Exts (Int(..), Char(..) {-, RuntimeRep(..)-}, TYPE)
27 import GHC.Word (Word8(..))
28 import GHC.ForeignPtr (ForeignPtr(..), ForeignPtrContents)
29 import GHC.Prim ({-Int#,-} Addr#, nullAddr#, indexWideCharArray#, {-indexWord16Array#,-} readWord8OffAddr#, word2Int#, chr#, touch#, realWorld#, plusAddr#, (+#))
30 import Language.Haskell.TH (CodeQ)
31 import Prelude ((+), (-), error)
32 import qualified Data.ByteString.Lazy.Internal as BSL
33 import qualified Data.List as List
34 import qualified Data.Text as Text
35 import qualified Symantic.Parser.Machine.Input.Text.Buffer as Text.Buffer
37 -- * Class 'Inputable'
38 class Positionable (InputPosition inp) => Inputable inp where
39 type InputBuffer inp :: Type
40 type InputPosition inp :: Type
41 type InputToken inp :: Type
42 cursorOf :: CodeQ inp -> CodeQ (Cursor inp)
43 nullInput :: inp -> Bool
46 type Cursor (inp :: TYPE r) = -- FIXME: what is that TYPE doing here?
47 (# {-initBuf-} InputBuffer inp
48 , {-initPos-} InputPosition inp
49 , {-more-} InputBuffer inp -> InputPosition inp -> Bool
50 , {-next-} InputBuffer inp -> InputPosition inp -> (# InputToken inp, InputPosition inp #)
51 , {-append-} InputBuffer inp -> inp -> InputBuffer inp #)
52 instance Inputable Text where
53 type InputBuffer Text = Text.Buffer.Buffer
54 type InputPosition Text = Int
55 type InputToken Text = Char
59 let !(Text.Unsafe.Iter c d) = Text.Buffer.iter buf pos in
61 more buf pos = pos < Text.Buffer.length buf
62 append = Text.Buffer.pappend
63 in (# Text.Buffer.buffer $$inp, 0, more, next, append #)
66 instance Inputable Text where
67 type InputBuffer Text = Text
68 type InputPosition Text = Text
69 type InputToken Text = Char
72 let next _buf t@(Text arr off unconsumed) =
73 let !(Text.Unsafe.Iter c d) = Text.Unsafe.iter t 0 in
74 (# c, Text arr (off+d) (unconsumed-d) #)
75 more _buf (Text _ _ unconsumed) = unconsumed > 0
76 in (# $$inp, $$inp, more, next, Text.append #)
80 -- ** Class 'Positionable'
81 class Show pos => Positionable pos where
82 position :: pos -> Int
83 comparePosition :: CodeQ (pos -> pos -> Ordering)
84 comparePosition = [|| compare `on` position ||]
85 lowerPosition :: CodeQ (pos -> pos -> Bool)
86 samePosition :: CodeQ (pos -> pos -> Bool)
87 shiftRight :: CodeQ (Int -> pos -> pos)
88 instance Positionable Int where
89 position = \inp -> inp
90 comparePosition = [|| compare @Int ||]
91 lowerPosition = [|| (<) @Int ||]
92 samePosition = [|| (==) @Int ||]
93 shiftRight = [|| (+) @Int ||]
94 instance Positionable Text where
95 position = \(Text _ i _) -> i
96 comparePosition = [|| \(Text _ i _) (Text _ j _) -> i `compare` j ||]
97 lowerPosition = [|| \(Text _ i _) (Text _ j _) -> i < j ||]
98 samePosition = [|| \(Text _ i _) (Text _ j _) -> i == j ||]
99 shiftRight = [||shiftRightText||]
101 shiftRightText :: Int -> Text -> Text
102 shiftRightText i (Text arr off unconsumed) = go i off unconsumed
104 go 0 off' unconsumed' = Text arr off' unconsumed'
105 go n off' unconsumed'
107 , !d <- Text.Unsafe.iter_ (Text arr off' unconsumed') 0
108 = go (n-1) (off'+d) (unconsumed'-d)
109 | otherwise = Text arr off' unconsumed'
112 shiftLeftText :: Int -> Text -> Text
113 shiftLeftText i (Text arr off unconsumed) = go i off unconsumed
115 go 0 off' unconsumed' = Text arr off' unconsumed'
116 go n off' unconsumed'
118 , !d <- Text.Unsafe.reverseIter_ (Text arr off' unconsumed') 0
119 = go (n-1) (off'+d) (unconsumed'-d)
120 | otherwise = Text arr off' unconsumed'
122 instance Positionable UnpackedLazyByteString where
123 position = \(UnpackedLazyByteString i _ _ _ _ _) -> i
124 lowerPosition = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i <= j||]
125 samePosition = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i == j||]
126 shiftRight = [||shiftRightByteString||]
128 shiftRightByteString :: Int -> UnpackedLazyByteString -> UnpackedLazyByteString
129 shiftRightByteString j (UnpackedLazyByteString i addr# final off size cs)
130 | j < size = UnpackedLazyByteString (i + j) addr# final (off + j) (size - j) cs
131 | otherwise = case cs of
132 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' ->
133 shiftRightByteString (j - size)
134 (UnpackedLazyByteString (i + size) addr'# final' off' size' cs')
135 BSL.Empty -> emptyUnpackedLazyByteString (i + size)
137 shiftLeftByteString :: Int -> UnpackedLazyByteString -> UnpackedLazyByteString
138 shiftLeftByteString j (UnpackedLazyByteString i addr# final off size cs) =
139 UnpackedLazyByteString (i - d) addr# final (off - d) (size + d) cs
143 newtype Text16 = Text16 Text
144 --newtype CacheText = CacheText Text
145 -- ** Type 'CharList'
146 newtype CharList = CharList String
148 data Stream = {-# UNPACK #-} !Char :> Stream
150 nomore = '\0' :> nomore
152 instance Positionable (OffWith Stream) where
153 lowerPosition = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
154 samePosition = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
155 shiftRight = [|| \i (OffWith o ts) -> OffWith (o + i) (dropStream i ts) ||]
157 dropStream :: Int -> Stream -> Stream
159 dropStream n (_ :> cs) = dropStream (n-1) cs
162 data OffWith ts = OffWith {-# UNPACK #-} !Int ts
165 instance Positionable (OffWith String) where
166 position = \(OffWith i _) -> i
167 lowerPosition = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
168 samePosition = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
169 shiftRight = [|| \i (OffWith o ts) -> OffWith (o + i) (List.drop i ts) ||]
171 -- ** Type 'OffWithStreamAnd'
172 data OffWithStreamAnd ts = OffWithStreamAnd {-# UNPACK #-} !Int !Stream ts
173 -- ** Type 'UnpackedLazyByteString'
174 data UnpackedLazyByteString = UnpackedLazyByteString
181 instance Show UnpackedLazyByteString where
182 show (UnpackedLazyByteString _i _addr _p _off _size _cs) = "UnpackedLazyByteString" -- FIXME
184 {-# INLINE emptyUnpackedLazyByteString #-}
185 emptyUnpackedLazyByteString :: Int -> UnpackedLazyByteString
186 emptyUnpackedLazyByteString i =
187 UnpackedLazyByteString i nullAddr#
188 (error "nullForeignPtr") 0 0 BSL.Empty
192 instance Inputable String where
193 type Cursor String = Int
194 type InputToken String = Char
195 cursorOf input = cursorOf @(UArray Int Char)
196 [|| listArray (0, List.length $$input-1) $$input ||]
197 instance Inputable (UArray Int Char) where
198 type Cursor (UArray Int Char) = Int
199 type InputToken (UArray Int Char) = Char
200 cursorOf qinput = [||
201 -- Pattern bindings containing unlifted types should use an outermost bang pattern.
202 let !(UArray _ _ size input#) = $$qinput
204 (# C# (indexWideCharArray# input# i#)
206 in (# 0, (< size), next #)
208 instance Inputable Text where
209 type Cursor Text = Text
210 type InputToken Text = Char
212 let _ = "cursorOf" in
213 let next t@(Text arr off unconsumed) =
214 let !(Text.Unsafe.Iter c d) = Text.Unsafe.iter t 0 in
215 (# c, Text arr (off+d) (unconsumed-d) #)
216 more (Text _ _ unconsumed) = unconsumed > 0
217 in (# $$inp, more, next #)
221 instance Inputable ByteString where
222 type Cursor ByteString = Int
223 type InputToken ByteString = Word8
224 cursorOf qinput = [||
225 -- Pattern bindings containing unlifted types should use an outermost bang pattern.
226 let !(PS (ForeignPtr addr# final) off size) = $$qinput
228 case readWord8OffAddr# (addr# `plusAddr#` i#) 0# realWorld# of
230 -- TODO: use keepAlive# ?
231 case touch# final s' of
232 _ -> (# W8# x, i + 1 #)
233 in (# off, (< size), next #)
235 instance Inputable BSL.ByteString where
236 type Cursor BSL.ByteString = UnpackedLazyByteString
237 type InputToken BSL.ByteString = Char
238 cursorOf qinput = [||
239 let next (UnpackedLazyByteString i addr# final off@(I# off#) size cs) =
240 case readWord8OffAddr# addr# off# realWorld# of
242 -- TODO: use keepAlive# ?
243 case touch# final s' of
245 (# C# (chr# (word2Int# x))
246 , if size /= 1 then UnpackedLazyByteString (i+1) addr# final (off+1) (size-1) cs
248 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' ->
249 UnpackedLazyByteString (i+1) addr'# final' off' size' cs'
250 BSL.Empty -> emptyUnpackedLazyByteString (i+1)
252 more (UnpackedLazyByteString _ _ _ _ 0 _) = False
254 init = case $$qinput of
255 BSL.Chunk (PS (ForeignPtr addr# final) off size) cs ->
256 UnpackedLazyByteString 0 addr# final off size cs
257 BSL.Empty -> emptyUnpackedLazyByteString 0
258 in (# init, more, next #)
262 instance Inputable Text16 where
263 type Cursor Text16 = Int
264 cursorOf qinput = [||
265 let Text16 (Text arr off size) = $$qinput
268 (# C# (chr# (word2Int# (indexWord16Array# arr# i#)))
270 in (# off, (< size), next #)
272 instance Inputable CharList where
273 type Cursor CharList = OffWith String
274 cursorOf qinput = [||
275 let CharList input = $$qinput
276 next (OffWith i (c:cs)) = (# c, OffWith (i+1) cs #)
277 size = List.length input
278 more (OffWith i _) = i < size
279 --more (OffWith _ []) = False
281 in (# $$offWith input, more, next #)
283 instance Inputable Stream where
284 type Cursor Stream = OffWith Stream
285 cursorOf qinput = [||
286 let next (OffWith o (c :> cs)) = (# c, OffWith (o + 1) cs #)
287 in (# $$offWith $$qinput, const True, next #)
290 offWith :: CodeQ (ts -> OffWith ts)
291 offWith = [|| OffWith 0 ||]
294 -- type instance Cursor CacheText = (Text, Stream)
295 -- type instance Cursor BSL.ByteString = OffWith BSL.ByteString