]> Git — Sourcephile - haskell/symantic-parser.git/blob - src/Symantic/Parser/Machine/Input.hs
add benchmarks
[haskell/symantic-parser.git] / src / Symantic / Parser / Machine / Input.hs
1 {-# LANGUAGE AllowAmbiguousTypes #-}
2 {-# LANGUAGE MagicHash #-}
3 {-# LANGUAGE TemplateHaskell #-}
4 {-# LANGUAGE UnboxedTuples #-}
5 module Symantic.Parser.Machine.Input where
6
7 import Data.Array.Base (UArray(..), listArray)
8 -- import Data.Array.Unboxed (UArray)
9 import Data.Bool
10 import Data.ByteString.Internal (ByteString(..))
11 import Data.Char (Char)
12 import Data.Word (Word8)
13 import Data.Eq (Eq(..))
14 import Data.Function (on)
15 import Data.Int (Int)
16 import Data.Kind (Type)
17 import Data.Ord (Ord(..), Ordering)
18 import Data.String (String)
19 import Data.Text ()
20 import Data.Text.Array ({-aBA, empty-})
21 import Data.Text.Internal (Text(..))
22 import Data.Text.Unsafe (iter, Iter(..), iter_, reverseIter_)
23 import Text.Show (Show(..))
24 import GHC.Exts (Int(..), Char(..), {-, RuntimeRep(..)-})
25 import GHC.Word (Word8(..))
26 import GHC.ForeignPtr (ForeignPtr(..), ForeignPtrContents)
27 import GHC.Prim ({-Int#,-} Addr#, nullAddr#, indexWideCharArray#, {-indexWord16Array#,-} readWord8OffAddr#, word2Int#, chr#, touch#, realWorld#, plusAddr#, (+#))
28 import Language.Haskell.TH (CodeQ)
29 import Prelude ((+), (-), error)
30 import qualified Data.ByteString.Lazy.Internal as BSL
31 import qualified Data.List as List
32
33 -- * Class 'Cursorable'
34 class Show cur => Cursorable cur where
35 offset :: cur -> Int
36 compareOffset :: CodeQ (cur -> cur -> Ordering)
37 compareOffset = [|| compare `on` offset ||]
38 lowerOffset :: CodeQ (cur -> cur -> Bool)
39 sameOffset :: CodeQ (cur -> cur -> Bool)
40 shiftRight :: CodeQ (Int -> cur -> cur)
41 instance Cursorable Int where
42 offset = \inp -> inp
43 compareOffset = [|| compare @Int ||]
44 lowerOffset = [|| (<) @Int ||]
45 sameOffset = [|| (==) @Int ||]
46 shiftRight = [|| (+) @Int ||]
47 instance Cursorable Text where
48 offset = \(Text _ i _) -> i
49 lowerOffset = [|| \(Text _ i _) (Text _ j _) -> i < j ||]
50 sameOffset = [|| \(Text _ i _) (Text _ j _) -> i == j ||]
51 shiftRight = [||shiftRightText||]
52
53 shiftRightText :: Int -> Text -> Text
54 shiftRightText i (Text arr off unconsumed) = go i off unconsumed
55 where
56 go 0 off' unconsumed' = Text arr off' unconsumed'
57 go n off' unconsumed'
58 | unconsumed' > 0
59 , !d <- iter_ (Text arr off' unconsumed') 0
60 = go (n-1) (off'+d) (unconsumed'-d)
61 | otherwise = Text arr off' unconsumed'
62
63 shiftLeftText :: Int -> Text -> Text
64 shiftLeftText i (Text arr off unconsumed) = go i off unconsumed
65 where
66 go 0 off' unconsumed' = Text arr off' unconsumed'
67 go n off' unconsumed'
68 | off' > 0
69 , !d <- reverseIter_ (Text arr off' unconsumed') 0
70 = go (n-1) (off'+d) (unconsumed'-d)
71 | otherwise = Text arr off' unconsumed'
72
73 instance Cursorable UnpackedLazyByteString where
74 offset = \(UnpackedLazyByteString i _ _ _ _ _) -> i
75 lowerOffset = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i <= j||]
76 sameOffset = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i == j||]
77 shiftRight = [||shiftRightByteString||]
78
79 shiftRightByteString :: Int -> UnpackedLazyByteString -> UnpackedLazyByteString
80 shiftRightByteString j !(UnpackedLazyByteString i addr# final off size cs)
81 | j < size = UnpackedLazyByteString (i + j) addr# final (off + j) (size - j) cs
82 | otherwise = case cs of
83 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' ->
84 shiftRightByteString (j - size)
85 (UnpackedLazyByteString (i + size) addr'# final' off' size' cs')
86 BSL.Empty -> emptyUnpackedLazyByteString (i + size)
87
88 shiftLeftByteString :: Int -> UnpackedLazyByteString -> UnpackedLazyByteString
89 shiftLeftByteString j !(UnpackedLazyByteString i addr# final off size cs) =
90 UnpackedLazyByteString (i - d) addr# final (off - d) (size + d) cs
91 where d = min off j
92
93 offWith :: CodeQ (ts -> OffWith ts)
94 offWith = [|| OffWith 0 ||]
95
96 -- ** Type 'Text16'
97 newtype Text16 = Text16 Text
98 --newtype CacheText = CacheText Text
99 -- ** Type 'CharList'
100 newtype CharList = CharList String
101 -- ** Type 'Stream'
102 data Stream = {-# UNPACK #-} !Char :> Stream
103 nomore :: Stream
104 nomore = '\0' :> nomore
105 {-
106 instance Cursorable (OffWith Stream) where
107 lowerOffset = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
108 sameOffset = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
109 shiftRight = [|| \i (OffWith o ts) -> OffWith (o + i) (dropStream i ts) ||]
110 where
111 dropStream :: Int -> Stream -> Stream
112 dropStream 0 cs = cs
113 dropStream n (_ :> cs) = dropStream (n-1) cs
114 -}
115
116 -- ** Type 'OffWith'
117 data OffWith ts = OffWith {-# UNPACK #-} !Int ts
118 deriving (Show)
119
120 instance Cursorable (OffWith String) where
121 offset = \(OffWith i _) -> i
122 lowerOffset = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
123 sameOffset = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
124 shiftRight = [|| \i (OffWith o ts) -> OffWith (o + i) (List.drop i ts) ||]
125
126 -- ** Type 'OffWithStreamAnd'
127 data OffWithStreamAnd ts = OffWithStreamAnd {-# UNPACK #-} !Int !Stream ts
128 -- ** Type 'UnpackedLazyByteString'
129 data UnpackedLazyByteString = UnpackedLazyByteString
130 {-# UNPACK #-} !Int
131 !Addr#
132 ForeignPtrContents
133 {-# UNPACK #-} !Int
134 {-# UNPACK #-} !Int
135 BSL.ByteString
136 instance Show UnpackedLazyByteString where
137 show (UnpackedLazyByteString _i _addr _p _off _size _cs) = "UnpackedLazyByteString" -- FIXME
138
139 {-# INLINE emptyUnpackedLazyByteString #-}
140 emptyUnpackedLazyByteString :: Int -> UnpackedLazyByteString
141 emptyUnpackedLazyByteString i =
142 UnpackedLazyByteString i nullAddr#
143 (error "nullForeignPtr") 0 0 BSL.Empty
144
145 -- * Class 'Inputable'
146 class Cursorable (Cursor inp) => Inputable inp where
147 type Cursor inp :: Type
148 type InputToken inp :: Type
149 cursorOf :: CodeQ inp -> CodeQ
150 (# {-init-} Cursor inp
151 , {-more-} Cursor inp -> Bool
152 , {-next-} Cursor inp -> (# InputToken inp, Cursor inp #)
153 #)
154
155 instance Inputable String where
156 type Cursor String = Int
157 type InputToken String = Char
158 cursorOf input = cursorOf @(UArray Int Char)
159 [|| listArray (0, List.length $$input-1) $$input ||]
160 instance Inputable (UArray Int Char) where
161 type Cursor (UArray Int Char) = Int
162 type InputToken (UArray Int Char) = Char
163 cursorOf qinput = [||
164 -- Pattern bindings containing unlifted types should use an outermost bang pattern.
165 let !(UArray _ _ size input#) = $$qinput
166 next (I# i#) =
167 (# C# (indexWideCharArray# input# i#)
168 , I# (i# +# 1#)
169 #)
170 in (# 0, (< size), next #)
171 ||]
172 instance Inputable Text where
173 type Cursor Text = Text
174 type InputToken Text = Char
175 cursorOf inp = [||
176 let _ = "cursorOf" in
177 let next t@(Text arr off unconsumed) =
178 let !(Iter c d) = iter t 0 in
179 (# c, Text arr (off+d) (unconsumed-d) #)
180 more (Text _ _ unconsumed) = unconsumed > 0
181 in (# $$inp, more, next #)
182 ||]
183 instance Inputable ByteString where
184 type Cursor ByteString = Int
185 type InputToken ByteString = Word8
186 cursorOf qinput = [||
187 -- Pattern bindings containing unlifted types should use an outermost bang pattern.
188 let !(PS (ForeignPtr addr# final) off size) = $$qinput
189 next i@(I# i#) =
190 case readWord8OffAddr# (addr# `plusAddr#` i#) 0# realWorld# of
191 (# s', x #) -> case touch# final s' of
192 _ -> (# W8# (x), i + 1 #)
193 in (# off, (< size), next #)
194 ||]
195 instance Inputable BSL.ByteString where
196 type Cursor BSL.ByteString = UnpackedLazyByteString
197 type InputToken BSL.ByteString = Char
198 cursorOf qinput = [||
199 let next (UnpackedLazyByteString i addr# final off@(I# off#) size cs) =
200 case readWord8OffAddr# addr# off# realWorld# of
201 (# s', x #) -> case touch# final s' of
202 _ ->
203 (# C# (chr# (word2Int# x))
204 , if size /= 1 then UnpackedLazyByteString (i+1) addr# final (off+1) (size-1) cs
205 else case cs of
206 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' ->
207 UnpackedLazyByteString (i+1) addr'# final' off' size' cs'
208 BSL.Empty -> emptyUnpackedLazyByteString (i+1)
209 #)
210 more (UnpackedLazyByteString _ _ _ _ 0 _) = False
211 more _ = True
212 init = case $$qinput of
213 BSL.Chunk (PS (ForeignPtr addr# final) off size) cs ->
214 UnpackedLazyByteString 0 addr# final off size cs
215 BSL.Empty -> emptyUnpackedLazyByteString 0
216 in (# init, more, next #)
217 ||]
218 {-
219 instance Inputable Text16 where
220 type Cursor Text16 = Int
221 cursorOf qinput = [||
222 let Text16 (Text arr off size) = $$qinput
223 arr# = aBA arr
224 next (I# i#) =
225 (# C# (chr# (word2Int# (indexWord16Array# arr# i#)))
226 , I# (i# +# 1#) #)
227 in (# off, (< size), next #)
228 ||]
229 instance Inputable CharList where
230 type Cursor CharList = OffWith String
231 cursorOf qinput = [||
232 let CharList input = $$qinput
233 next (OffWith i (c:cs)) = (# c, OffWith (i+1) cs #)
234 size = List.length input
235 more (OffWith i _) = i < size
236 --more (OffWith _ []) = False
237 --more _ = True
238 in (# $$offWith input, more, next #)
239 ||]
240 instance Inputable Stream where
241 type Cursor Stream = OffWith Stream
242 cursorOf qinput = [||
243 let next (OffWith o (c :> cs)) = (# c, OffWith (o + 1) cs #)
244 in (# $$offWith $$qinput, const True, next #)
245 ||]
246 -}
247 {-
248 -- type instance Cursor CacheText = (Text, Stream)
249 -- type instance Cursor BSL.ByteString = OffWith BSL.ByteString
250 -}
251