1 {-# LANGUAGE AllowAmbiguousTypes #-}
2 {-# LANGUAGE MagicHash #-}
3 {-# LANGUAGE TemplateHaskell #-}
4 {-# LANGUAGE UnboxedTuples #-}
5 module Symantic.Parser.Machine.Input where
7 import Data.Array.Base (UArray(..), listArray)
8 -- import Data.Array.Unboxed (UArray)
10 import Data.ByteString.Internal (ByteString(..))
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Function (on)
15 import Data.Kind (Type)
16 import Data.Ord (Ord(..), Ordering)
17 import Data.String (String)
19 import Data.Text.Array ({-aBA, empty-})
20 import Data.Text.Internal (Text(..))
21 import Data.Text.Unsafe (iter, Iter(..), iter_, reverseIter_)
22 import Text.Show (Show(..))
23 import GHC.Exts (Int(..), Char(..){-, RuntimeRep(..)-})
24 import GHC.ForeignPtr (ForeignPtr(..), ForeignPtrContents)
25 import GHC.Prim ({-Int#,-} Addr#, nullAddr#, indexWideCharArray#, {-indexWord16Array#,-} readWord8OffAddr#, word2Int#, chr#, touch#, realWorld#, plusAddr#, (+#))
26 import Language.Haskell.TH (CodeQ)
27 import Prelude ((+), (-), error)
28 import qualified Data.ByteString.Lazy.Internal as BSL
29 import qualified Data.List as List
31 -- * Class 'Cursorable'
32 class Show cur => Cursorable cur where
34 compareOffset :: CodeQ (cur -> cur -> Ordering)
35 compareOffset = [|| compare `on` offset ||]
36 lowerOffset :: CodeQ (cur -> cur -> Bool)
37 sameOffset :: CodeQ (cur -> cur -> Bool)
38 shiftRight :: CodeQ (Int -> cur -> cur)
39 instance Cursorable Int where
41 compareOffset = [|| compare @Int ||]
42 lowerOffset = [|| (<) @Int ||]
43 sameOffset = [|| (==) @Int ||]
44 shiftRight = [|| (+) @Int ||]
45 instance Cursorable Text where
46 offset = \(Text _ i _) -> i
47 lowerOffset = [|| \(Text _ i _) (Text _ j _) -> i < j ||]
48 sameOffset = [|| \(Text _ i _) (Text _ j _) -> i == j ||]
49 shiftRight = [||shiftRightText||]
51 shiftRightText :: Int -> Text -> Text
52 shiftRightText i (Text arr off unconsumed) = go i off unconsumed
54 go 0 off' unconsumed' = Text arr off' unconsumed'
57 , !d <- iter_ (Text arr off' unconsumed') 0
58 = go (n-1) (off'+d) (unconsumed'-d)
59 | otherwise = Text arr off' unconsumed'
61 shiftLeftText :: Int -> Text -> Text
62 shiftLeftText i (Text arr off unconsumed) = go i off unconsumed
64 go 0 off' unconsumed' = Text arr off' unconsumed'
67 , !d <- reverseIter_ (Text arr off' unconsumed') 0
68 = go (n-1) (off'+d) (unconsumed'-d)
69 | otherwise = Text arr off' unconsumed'
71 instance Cursorable UnpackedLazyByteString where
72 offset = \(UnpackedLazyByteString i _ _ _ _ _) -> i
73 lowerOffset = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i <= j||]
74 sameOffset = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i == j||]
75 shiftRight = [||shiftRightByteString||]
77 shiftRightByteString :: Int -> UnpackedLazyByteString -> UnpackedLazyByteString
78 shiftRightByteString j !(UnpackedLazyByteString i addr# final off size cs)
79 | j < size = UnpackedLazyByteString (i + j) addr# final (off + j) (size - j) cs
80 | otherwise = case cs of
81 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' ->
82 shiftRightByteString (j - size)
83 (UnpackedLazyByteString (i + size) addr'# final' off' size' cs')
84 BSL.Empty -> emptyUnpackedLazyByteString (i + size)
86 shiftLeftByteString :: Int -> UnpackedLazyByteString -> UnpackedLazyByteString
87 shiftLeftByteString j (UnpackedLazyByteString i addr# final off size cs) =
88 UnpackedLazyByteString (i - d) addr# final (off - d) (size + d) cs
91 offWith :: CodeQ (ts -> OffWith ts)
92 offWith = [|| OffWith 0 ||]
95 newtype Text16 = Text16 Text
96 --newtype CacheText = CacheText Text
98 newtype CharList = CharList String
100 data Stream = {-# UNPACK #-} !Char :> Stream
102 nomore = '\0' :> nomore
104 instance Cursorable (OffWith Stream) where
105 lowerOffset = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
106 sameOffset = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
107 shiftRight = [|| \i (OffWith o ts) -> OffWith (o + i) (dropStream i ts) ||]
109 dropStream :: Int -> Stream -> Stream
111 dropStream n (_ :> cs) = dropStream (n-1) cs
115 data OffWith ts = OffWith {-# UNPACK #-} !Int ts
118 instance Cursorable (OffWith String) where
119 offset = \(OffWith i _) -> i
120 lowerOffset = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
121 sameOffset = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
122 shiftRight = [|| \i (OffWith o ts) -> OffWith (o + i) (List.drop i ts) ||]
124 -- ** Type 'OffWithStreamAnd'
125 data OffWithStreamAnd ts = OffWithStreamAnd {-# UNPACK #-} !Int !Stream ts
126 -- ** Type 'UnpackedLazyByteString'
127 data UnpackedLazyByteString = UnpackedLazyByteString
134 instance Show UnpackedLazyByteString where
135 show (UnpackedLazyByteString _i _addr _p _off _size _cs) = "UnpackedLazyByteString" -- FIXME
137 {-# INLINE emptyUnpackedLazyByteString #-}
138 emptyUnpackedLazyByteString :: Int -> UnpackedLazyByteString
139 emptyUnpackedLazyByteString i =
140 UnpackedLazyByteString i nullAddr#
141 (error "nullForeignPtr") 0 0 BSL.Empty
144 class Cursorable (Cursor inp) => Input inp where
145 type Cursor inp :: Type
146 type InputToken inp :: Type
147 cursorOf :: CodeQ inp -> CodeQ
148 (# {-init-} Cursor inp
149 , {-more-} Cursor inp -> Bool
150 , {-next-} Cursor inp -> (# InputToken inp, Cursor inp #)
153 instance Input String where
154 type Cursor String = Int
155 type InputToken String = Char
156 cursorOf input = cursorOf @(UArray Int Char)
157 [|| listArray (0, List.length $$input-1) $$input ||]
158 instance Input (UArray Int Char) where
159 type Cursor (UArray Int Char) = Int
160 type InputToken (UArray Int Char) = Char
161 cursorOf qinput = [||
162 let UArray _ _ size input# = $$qinput
164 (# C# (indexWideCharArray# input# i#)
167 in (# 0, (< size), next #)
169 instance Input Text where
170 type Cursor Text = Text
171 type InputToken Text = Char
173 let _ = "cursorOf" in
174 let next t@(Text arr off unconsumed) =
175 let !(Iter c d) = iter t 0 in
176 (# c, Text arr (off+d) (unconsumed-d) #)
177 more (Text _ _ unconsumed) = unconsumed > 0
178 in (# $$inp, more, next #)
180 instance Input ByteString where
181 type Cursor ByteString = Int
182 type InputToken ByteString = Char
183 cursorOf qinput = [||
184 let PS (ForeignPtr addr# final) off size = $$qinput
186 case readWord8OffAddr# (addr# `plusAddr#` i#) 0# realWorld# of
187 (# s', x #) -> case touch# final s' of
188 _ -> (# C# (chr# (word2Int# x)), i + 1 #)
189 in (# off, (< size), next #)
191 instance Input BSL.ByteString where
192 type Cursor BSL.ByteString = UnpackedLazyByteString
193 type InputToken BSL.ByteString = Char
194 cursorOf qinput = [||
195 let next (UnpackedLazyByteString i addr# final off@(I# off#) size cs) =
196 case readWord8OffAddr# addr# off# realWorld# of
197 (# s', x #) -> case touch# final s' of
199 (# C# (chr# (word2Int# x))
200 , if size /= 1 then UnpackedLazyByteString (i+1) addr# final (off+1) (size-1) cs
202 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' ->
203 UnpackedLazyByteString (i+1) addr'# final' off' size' cs'
204 BSL.Empty -> emptyUnpackedLazyByteString (i+1)
206 more (UnpackedLazyByteString _ _ _ _ 0 _) = False
208 init = case $$qinput of
209 BSL.Chunk (PS (ForeignPtr addr# final) off size) cs ->
210 UnpackedLazyByteString 0 addr# final off size cs
211 BSL.Empty -> emptyUnpackedLazyByteString 0
212 in (# init, more, next #)
215 instance Input Text16 where
216 type Cursor Text16 = Int
217 cursorOf qinput = [||
218 let Text16 (Text arr off size) = $$qinput
221 (# C# (chr# (word2Int# (indexWord16Array# arr# i#)))
223 in (# off, (< size), next #)
225 instance Input CharList where
226 type Cursor CharList = OffWith String
227 cursorOf qinput = [||
228 let CharList input = $$qinput
229 next (OffWith i (c:cs)) = (# c, OffWith (i+1) cs #)
230 size = List.length input
231 more (OffWith i _) = i < size
232 --more (OffWith _ []) = False
234 in (# $$offWith input, more, next #)
236 instance Input Stream where
237 type Cursor Stream = OffWith Stream
238 cursorOf qinput = [||
239 let next (OffWith o (c :> cs)) = (# c, OffWith (o + 1) cs #)
240 in (# $$offWith $$qinput, const True, next #)
244 -- type instance Cursor CacheText = (Text, Stream)
245 -- type instance Cursor BSL.ByteString = OffWith BSL.ByteString