]> Git — Sourcephile - haskell/symantic-parser.git/blob - src/Symantic/Parser/Machine/Input.hs
machine: add horizon optimization
[haskell/symantic-parser.git] / src / Symantic / Parser / Machine / Input.hs
1 {-# LANGUAGE AllowAmbiguousTypes #-}
2 {-# LANGUAGE MagicHash #-}
3 {-# LANGUAGE TemplateHaskell #-}
4 {-# LANGUAGE UnboxedTuples #-}
5 module Symantic.Parser.Machine.Input where
6
7 import Data.Array.Base (UArray(..), listArray)
8 -- import Data.Array.Unboxed (UArray)
9 import Data.Bool
10 import Data.ByteString.Internal (ByteString(..))
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Function (on)
14 import Data.Int (Int)
15 import Data.Kind (Type)
16 import Data.Ord (Ord(..), Ordering)
17 import Data.String (String)
18 import Data.Text ()
19 import Data.Text.Array ({-aBA, empty-})
20 import Data.Text.Internal (Text(..))
21 import Data.Text.Unsafe (iter, Iter(..), iter_, reverseIter_)
22 import Text.Show (Show(..))
23 import GHC.Exts (Int(..), Char(..){-, RuntimeRep(..)-})
24 import GHC.ForeignPtr (ForeignPtr(..), ForeignPtrContents)
25 import GHC.Prim ({-Int#,-} Addr#, nullAddr#, indexWideCharArray#, {-indexWord16Array#,-} readWord8OffAddr#, word2Int#, chr#, touch#, realWorld#, plusAddr#, (+#))
26 import Language.Haskell.TH (CodeQ)
27 import Prelude ((+), (-), error)
28 import qualified Data.ByteString.Lazy.Internal as BSL
29 import qualified Data.List as List
30
31 -- * Class 'Cursorable'
32 class Show cur => Cursorable cur where
33 offset :: cur -> Int
34 compareOffset :: CodeQ (cur -> cur -> Ordering)
35 compareOffset = [|| compare `on` offset ||]
36 lowerOffset :: CodeQ (cur -> cur -> Bool)
37 sameOffset :: CodeQ (cur -> cur -> Bool)
38 shiftRight :: CodeQ (Int -> cur -> cur)
39 instance Cursorable Int where
40 offset = \inp -> inp
41 compareOffset = [|| compare @Int ||]
42 lowerOffset = [|| (<) @Int ||]
43 sameOffset = [|| (==) @Int ||]
44 shiftRight = [|| (+) @Int ||]
45 instance Cursorable Text where
46 offset = \(Text _ i _) -> i
47 lowerOffset = [|| \(Text _ i _) (Text _ j _) -> i < j ||]
48 sameOffset = [|| \(Text _ i _) (Text _ j _) -> i == j ||]
49 shiftRight = [||shiftRightText||]
50
51 shiftRightText :: Int -> Text -> Text
52 shiftRightText i (Text arr off unconsumed) = go i off unconsumed
53 where
54 go 0 off' unconsumed' = Text arr off' unconsumed'
55 go n off' unconsumed'
56 | unconsumed' > 0
57 , !d <- iter_ (Text arr off' unconsumed') 0
58 = go (n-1) (off'+d) (unconsumed'-d)
59 | otherwise = Text arr off' unconsumed'
60
61 shiftLeftText :: Int -> Text -> Text
62 shiftLeftText i (Text arr off unconsumed) = go i off unconsumed
63 where
64 go 0 off' unconsumed' = Text arr off' unconsumed'
65 go n off' unconsumed'
66 | off' > 0
67 , !d <- reverseIter_ (Text arr off' unconsumed') 0
68 = go (n-1) (off'+d) (unconsumed'-d)
69 | otherwise = Text arr off' unconsumed'
70
71 instance Cursorable UnpackedLazyByteString where
72 offset = \(UnpackedLazyByteString i _ _ _ _ _) -> i
73 lowerOffset = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i <= j||]
74 sameOffset = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i == j||]
75 shiftRight = [||shiftRightByteString||]
76
77 shiftRightByteString :: Int -> UnpackedLazyByteString -> UnpackedLazyByteString
78 shiftRightByteString j !(UnpackedLazyByteString i addr# final off size cs)
79 | j < size = UnpackedLazyByteString (i + j) addr# final (off + j) (size - j) cs
80 | otherwise = case cs of
81 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' ->
82 shiftRightByteString (j - size)
83 (UnpackedLazyByteString (i + size) addr'# final' off' size' cs')
84 BSL.Empty -> emptyUnpackedLazyByteString (i + size)
85
86 shiftLeftByteString :: Int -> UnpackedLazyByteString -> UnpackedLazyByteString
87 shiftLeftByteString j (UnpackedLazyByteString i addr# final off size cs) =
88 UnpackedLazyByteString (i - d) addr# final (off - d) (size + d) cs
89 where d = min off j
90
91 offWith :: CodeQ (ts -> OffWith ts)
92 offWith = [|| OffWith 0 ||]
93
94 -- ** Type 'Text16'
95 newtype Text16 = Text16 Text
96 --newtype CacheText = CacheText Text
97 -- ** Type 'CharList'
98 newtype CharList = CharList String
99 -- ** Type 'Stream'
100 data Stream = {-# UNPACK #-} !Char :> Stream
101 nomore :: Stream
102 nomore = '\0' :> nomore
103 {-
104 instance Cursorable (OffWith Stream) where
105 lowerOffset = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
106 sameOffset = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
107 shiftRight = [|| \i (OffWith o ts) -> OffWith (o + i) (dropStream i ts) ||]
108 where
109 dropStream :: Int -> Stream -> Stream
110 dropStream 0 cs = cs
111 dropStream n (_ :> cs) = dropStream (n-1) cs
112 -}
113
114 -- ** Type 'OffWith'
115 data OffWith ts = OffWith {-# UNPACK #-} !Int ts
116 deriving (Show)
117
118 instance Cursorable (OffWith String) where
119 offset = \(OffWith i _) -> i
120 lowerOffset = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
121 sameOffset = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
122 shiftRight = [|| \i (OffWith o ts) -> OffWith (o + i) (List.drop i ts) ||]
123
124 -- ** Type 'OffWithStreamAnd'
125 data OffWithStreamAnd ts = OffWithStreamAnd {-# UNPACK #-} !Int !Stream ts
126 -- ** Type 'UnpackedLazyByteString'
127 data UnpackedLazyByteString = UnpackedLazyByteString
128 {-# UNPACK #-} !Int
129 !Addr#
130 ForeignPtrContents
131 {-# UNPACK #-} !Int
132 {-# UNPACK #-} !Int
133 BSL.ByteString
134 instance Show UnpackedLazyByteString where
135 show (UnpackedLazyByteString _i _addr _p _off _size _cs) = "UnpackedLazyByteString" -- FIXME
136
137 {-# INLINE emptyUnpackedLazyByteString #-}
138 emptyUnpackedLazyByteString :: Int -> UnpackedLazyByteString
139 emptyUnpackedLazyByteString i =
140 UnpackedLazyByteString i nullAddr#
141 (error "nullForeignPtr") 0 0 BSL.Empty
142
143 -- * Class 'Input'
144 class Cursorable (Cursor inp) => Input inp where
145 type Cursor inp :: Type
146 type InputToken inp :: Type
147 cursorOf :: CodeQ inp -> CodeQ
148 (# {-init-} Cursor inp
149 , {-more-} Cursor inp -> Bool
150 , {-next-} Cursor inp -> (# InputToken inp, Cursor inp #)
151 #)
152
153 instance Input String where
154 type Cursor String = Int
155 type InputToken String = Char
156 cursorOf input = cursorOf @(UArray Int Char)
157 [|| listArray (0, List.length $$input-1) $$input ||]
158 instance Input (UArray Int Char) where
159 type Cursor (UArray Int Char) = Int
160 type InputToken (UArray Int Char) = Char
161 cursorOf qinput = [||
162 let UArray _ _ size input# = $$qinput
163 next (I# i#) =
164 (# C# (indexWideCharArray# input# i#)
165 , I# (i# +# 1#)
166 #)
167 in (# 0, (< size), next #)
168 ||]
169 instance Input Text where
170 type Cursor Text = Text
171 type InputToken Text = Char
172 cursorOf inp = [||
173 let _ = "cursorOf" in
174 let next t@(Text arr off unconsumed) =
175 let !(Iter c d) = iter t 0 in
176 (# c, Text arr (off+d) (unconsumed-d) #)
177 more (Text _ _ unconsumed) = unconsumed > 0
178 in (# $$inp, more, next #)
179 ||]
180 instance Input ByteString where
181 type Cursor ByteString = Int
182 type InputToken ByteString = Char
183 cursorOf qinput = [||
184 let PS (ForeignPtr addr# final) off size = $$qinput
185 next i@(I# i#) =
186 case readWord8OffAddr# (addr# `plusAddr#` i#) 0# realWorld# of
187 (# s', x #) -> case touch# final s' of
188 _ -> (# C# (chr# (word2Int# x)), i + 1 #)
189 in (# off, (< size), next #)
190 ||]
191 instance Input BSL.ByteString where
192 type Cursor BSL.ByteString = UnpackedLazyByteString
193 type InputToken BSL.ByteString = Char
194 cursorOf qinput = [||
195 let next (UnpackedLazyByteString i addr# final off@(I# off#) size cs) =
196 case readWord8OffAddr# addr# off# realWorld# of
197 (# s', x #) -> case touch# final s' of
198 _ ->
199 (# C# (chr# (word2Int# x))
200 , if size /= 1 then UnpackedLazyByteString (i+1) addr# final (off+1) (size-1) cs
201 else case cs of
202 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' ->
203 UnpackedLazyByteString (i+1) addr'# final' off' size' cs'
204 BSL.Empty -> emptyUnpackedLazyByteString (i+1)
205 #)
206 more (UnpackedLazyByteString _ _ _ _ 0 _) = False
207 more _ = True
208 init = case $$qinput of
209 BSL.Chunk (PS (ForeignPtr addr# final) off size) cs ->
210 UnpackedLazyByteString 0 addr# final off size cs
211 BSL.Empty -> emptyUnpackedLazyByteString 0
212 in (# init, more, next #)
213 ||]
214 {-
215 instance Input Text16 where
216 type Cursor Text16 = Int
217 cursorOf qinput = [||
218 let Text16 (Text arr off size) = $$qinput
219 arr# = aBA arr
220 next (I# i#) =
221 (# C# (chr# (word2Int# (indexWord16Array# arr# i#)))
222 , I# (i# +# 1#) #)
223 in (# off, (< size), next #)
224 ||]
225 instance Input CharList where
226 type Cursor CharList = OffWith String
227 cursorOf qinput = [||
228 let CharList input = $$qinput
229 next (OffWith i (c:cs)) = (# c, OffWith (i+1) cs #)
230 size = List.length input
231 more (OffWith i _) = i < size
232 --more (OffWith _ []) = False
233 --more _ = True
234 in (# $$offWith input, more, next #)
235 ||]
236 instance Input Stream where
237 type Cursor Stream = OffWith Stream
238 cursorOf qinput = [||
239 let next (OffWith o (c :> cs)) = (# c, OffWith (o + 1) cs #)
240 in (# $$offWith $$qinput, const True, next #)
241 ||]
242 -}
243 {-
244 -- type instance Cursor CacheText = (Text, Stream)
245 -- type instance Cursor BSL.ByteString = OffWith BSL.ByteString
246 -}