]> Git — Sourcephile - haskell/symantic-parser.git/blob - src/Symantic/Parser/Machine/Input.hs
impl: update to text-2
[haskell/symantic-parser.git] / src / Symantic / Parser / Machine / Input.hs
1 {-# LANGUAGE AllowAmbiguousTypes #-}
2 {-# LANGUAGE MagicHash #-}
3 {-# LANGUAGE TemplateHaskell #-}
4 {-# LANGUAGE UnboxedTuples #-}
5 {-# LANGUAGE PolyKinds #-}
6 module Symantic.Parser.Machine.Input where
7
8 import Data.Array.Base (UArray(..), listArray)
9 -- import Data.Array.Unboxed (UArray)
10 import Data.Bool
11 import Data.ByteString.Internal (ByteString(..))
12 import Data.Char (Char)
13 import Data.Word (Word8)
14 import Data.Eq (Eq(..))
15 import Data.Function (on)
16 import Data.Int (Int)
17 import Data.Kind (Type)
18 import Data.Ord (Ord(..), Ordering)
19 import Data.String (String)
20 import Data.Text ()
21 import Data.Text.Array ({-aBA, empty-})
22 import Data.Text.Internal (Text(..))
23 --import Data.Text.Unsafe (iter, Iter(..), iter_, reverseIter_)
24 import qualified Data.Text.Unsafe as Text.Unsafe
25 import Text.Show (Show(..))
26 import GHC.Exts (Int(..), Char(..) {-, RuntimeRep(..)-}, TYPE)
27 import GHC.Word (Word8(..))
28 import GHC.ForeignPtr (ForeignPtr(..), ForeignPtrContents)
29 import GHC.Prim ({-Int#,-} Addr#, nullAddr#, indexWideCharArray#, {-indexWord16Array#,-} readWord8OffAddr#, word2Int#, chr#, touch#, realWorld#, plusAddr#, (+#))
30 import Language.Haskell.TH (CodeQ)
31 import Prelude ((+), (-), error)
32 import qualified Data.ByteString.Lazy.Internal as BSL
33 import qualified Data.List as List
34 import qualified Data.Text as Text
35 import qualified Symantic.Parser.Machine.Input.Text.Buffer as Text.Buffer
36
37 -- * Class 'Inputable'
38 class Positionable (InputPosition inp) => Inputable inp where
39 type InputBuffer inp :: Type
40 type InputPosition inp :: Type
41 type InputToken inp :: Type
42 cursorOf :: CodeQ inp -> CodeQ (Cursor inp)
43 nullInput :: inp -> Bool
44
45 -- ** Type 'Cursor'
46 type Cursor (inp :: TYPE r) = -- FIXME: what is that TYPE doing here?
47 (# {-initBuf-} InputBuffer inp
48 , {-initPos-} InputPosition inp
49 , {-more-} InputBuffer inp -> InputPosition inp -> Bool
50 , {-next-} InputBuffer inp -> InputPosition inp -> (# InputToken inp, InputPosition inp #)
51 , {-append-} InputBuffer inp -> inp -> InputBuffer inp #)
52 instance Inputable Text where
53 type InputBuffer Text = Text.Buffer.Buffer
54 type InputPosition Text = Int
55 type InputToken Text = Char
56 nullInput = Text.null
57 cursorOf inp = [||
58 let next buf pos =
59 let !(Text.Unsafe.Iter c d) = Text.Buffer.iter buf pos in
60 (# c, pos+d #)
61 more buf pos = pos < Text.Buffer.length buf
62 append = Text.Buffer.pappend
63 in (# Text.Buffer.buffer $$inp, 0, more, next, append #)
64 ||]
65 {-
66 instance Inputable Text where
67 type InputBuffer Text = Text
68 type InputPosition Text = Text
69 type InputToken Text = Char
70 nullInput = Text.null
71 cursorOf inp = [||
72 let next _buf t@(Text arr off unconsumed) =
73 let !(Text.Unsafe.Iter c d) = Text.Unsafe.iter t 0 in
74 (# c, Text arr (off+d) (unconsumed-d) #)
75 more _buf (Text _ _ unconsumed) = unconsumed > 0
76 in (# $$inp, $$inp, more, next, Text.append #)
77 ||]
78 -}
79
80 -- ** Class 'Positionable'
81 class Show pos => Positionable pos where
82 position :: pos -> Int
83 comparePosition :: CodeQ (pos -> pos -> Ordering)
84 comparePosition = [|| compare `on` position ||]
85 lowerPosition :: CodeQ (pos -> pos -> Bool)
86 samePosition :: CodeQ (pos -> pos -> Bool)
87 shiftRight :: CodeQ (Int -> pos -> pos)
88 instance Positionable Int where
89 position = \inp -> inp
90 comparePosition = [|| compare @Int ||]
91 lowerPosition = [|| (<) @Int ||]
92 samePosition = [|| (==) @Int ||]
93 shiftRight = [|| (+) @Int ||]
94 instance Positionable Text where
95 position = \(Text _ i _) -> i
96 comparePosition = [|| \(Text _ i _) (Text _ j _) -> i `compare` j ||]
97 lowerPosition = [|| \(Text _ i _) (Text _ j _) -> i < j ||]
98 samePosition = [|| \(Text _ i _) (Text _ j _) -> i == j ||]
99 shiftRight = [||shiftRightText||]
100
101 shiftRightText :: Int -> Text -> Text
102 shiftRightText i (Text arr off unconsumed) = go i off unconsumed
103 where
104 go 0 off' unconsumed' = Text arr off' unconsumed'
105 go n off' unconsumed'
106 | unconsumed' > 0
107 , !d <- Text.Unsafe.iter_ (Text arr off' unconsumed') 0
108 = go (n-1) (off'+d) (unconsumed'-d)
109 | otherwise = Text arr off' unconsumed'
110
111 {-
112 shiftLeftText :: Int -> Text -> Text
113 shiftLeftText i (Text arr off unconsumed) = go i off unconsumed
114 where
115 go 0 off' unconsumed' = Text arr off' unconsumed'
116 go n off' unconsumed'
117 | off' > 0
118 , !d <- Text.Unsafe.reverseIter_ (Text arr off' unconsumed') 0
119 = go (n-1) (off'+d) (unconsumed'-d)
120 | otherwise = Text arr off' unconsumed'
121
122 instance Positionable UnpackedLazyByteString where
123 position = \(UnpackedLazyByteString i _ _ _ _ _) -> i
124 lowerPosition = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i <= j||]
125 samePosition = [||\(UnpackedLazyByteString i _ _ _ _ _) (UnpackedLazyByteString j _ _ _ _ _) -> i == j||]
126 shiftRight = [||shiftRightByteString||]
127
128 shiftRightByteString :: Int -> UnpackedLazyByteString -> UnpackedLazyByteString
129 shiftRightByteString j (UnpackedLazyByteString i addr# final off size cs)
130 | j < size = UnpackedLazyByteString (i + j) addr# final (off + j) (size - j) cs
131 | otherwise = case cs of
132 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' ->
133 shiftRightByteString (j - size)
134 (UnpackedLazyByteString (i + size) addr'# final' off' size' cs')
135 BSL.Empty -> emptyUnpackedLazyByteString (i + size)
136
137 shiftLeftByteString :: Int -> UnpackedLazyByteString -> UnpackedLazyByteString
138 shiftLeftByteString j (UnpackedLazyByteString i addr# final off size cs) =
139 UnpackedLazyByteString (i - d) addr# final (off - d) (size + d) cs
140 where d = min off j
141 -}
142 -- ** Type 'Text16'
143 newtype Text16 = Text16 Text
144 --newtype CacheText = CacheText Text
145 -- ** Type 'CharList'
146 newtype CharList = CharList String
147 -- ** Type 'Stream'
148 data Stream = {-# UNPACK #-} !Char :> Stream
149 nomore :: Stream
150 nomore = '\0' :> nomore
151 {-
152 instance Positionable (OffWith Stream) where
153 lowerPosition = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
154 samePosition = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
155 shiftRight = [|| \i (OffWith o ts) -> OffWith (o + i) (dropStream i ts) ||]
156 where
157 dropStream :: Int -> Stream -> Stream
158 dropStream 0 cs = cs
159 dropStream n (_ :> cs) = dropStream (n-1) cs
160
161 -- ** Type 'OffWith'
162 data OffWith ts = OffWith {-# UNPACK #-} !Int ts
163 deriving (Show)
164
165 instance Positionable (OffWith String) where
166 position = \(OffWith i _) -> i
167 lowerPosition = [|| \(OffWith i _) (OffWith j _) -> i < j ||]
168 samePosition = [|| \(OffWith i _) (OffWith j _) -> i == j ||]
169 shiftRight = [|| \i (OffWith o ts) -> OffWith (o + i) (List.drop i ts) ||]
170
171 -- ** Type 'OffWithStreamAnd'
172 data OffWithStreamAnd ts = OffWithStreamAnd {-# UNPACK #-} !Int !Stream ts
173 -- ** Type 'UnpackedLazyByteString'
174 data UnpackedLazyByteString = UnpackedLazyByteString
175 {-# UNPACK #-} !Int
176 !Addr#
177 ForeignPtrContents
178 {-# UNPACK #-} !Int
179 {-# UNPACK #-} !Int
180 BSL.ByteString
181 instance Show UnpackedLazyByteString where
182 show (UnpackedLazyByteString _i _addr _p _off _size _cs) = "UnpackedLazyByteString" -- FIXME
183
184 {-# INLINE emptyUnpackedLazyByteString #-}
185 emptyUnpackedLazyByteString :: Int -> UnpackedLazyByteString
186 emptyUnpackedLazyByteString i =
187 UnpackedLazyByteString i nullAddr#
188 (error "nullForeignPtr") 0 0 BSL.Empty
189 -}
190
191 {-
192 instance Inputable String where
193 type Cursor String = Int
194 type InputToken String = Char
195 cursorOf input = cursorOf @(UArray Int Char)
196 [|| listArray (0, List.length $$input-1) $$input ||]
197 instance Inputable (UArray Int Char) where
198 type Cursor (UArray Int Char) = Int
199 type InputToken (UArray Int Char) = Char
200 cursorOf qinput = [||
201 -- Pattern bindings containing unlifted types should use an outermost bang pattern.
202 let !(UArray _ _ size input#) = $$qinput
203 next (I# i#) =
204 (# C# (indexWideCharArray# input# i#)
205 , I# (i# +# 1#) #)
206 in (# 0, (< size), next #)
207 ||]
208 instance Inputable Text where
209 type Cursor Text = Text
210 type InputToken Text = Char
211 cursorOf inp = [||
212 let _ = "cursorOf" in
213 let next t@(Text arr off unconsumed) =
214 let !(Text.Unsafe.Iter c d) = Text.Unsafe.iter t 0 in
215 (# c, Text arr (off+d) (unconsumed-d) #)
216 more (Text _ _ unconsumed) = unconsumed > 0
217 in (# $$inp, more, next #)
218 ||]
219 -}
220 {-
221 instance Inputable ByteString where
222 type Cursor ByteString = Int
223 type InputToken ByteString = Word8
224 cursorOf qinput = [||
225 -- Pattern bindings containing unlifted types should use an outermost bang pattern.
226 let !(PS (ForeignPtr addr# final) off size) = $$qinput
227 next i@(I# i#) =
228 case readWord8OffAddr# (addr# `plusAddr#` i#) 0# realWorld# of
229 (# s', x #) ->
230 -- TODO: use keepAlive# ?
231 case touch# final s' of
232 _ -> (# W8# x, i + 1 #)
233 in (# off, (< size), next #)
234 ||]
235 instance Inputable BSL.ByteString where
236 type Cursor BSL.ByteString = UnpackedLazyByteString
237 type InputToken BSL.ByteString = Char
238 cursorOf qinput = [||
239 let next (UnpackedLazyByteString i addr# final off@(I# off#) size cs) =
240 case readWord8OffAddr# addr# off# realWorld# of
241 (# s', x #) ->
242 -- TODO: use keepAlive# ?
243 case touch# final s' of
244 _ ->
245 (# C# (chr# (word2Int# x))
246 , if size /= 1 then UnpackedLazyByteString (i+1) addr# final (off+1) (size-1) cs
247 else case cs of
248 BSL.Chunk (PS (ForeignPtr addr'# final') off' size') cs' ->
249 UnpackedLazyByteString (i+1) addr'# final' off' size' cs'
250 BSL.Empty -> emptyUnpackedLazyByteString (i+1)
251 #)
252 more (UnpackedLazyByteString _ _ _ _ 0 _) = False
253 more _ = True
254 init = case $$qinput of
255 BSL.Chunk (PS (ForeignPtr addr# final) off size) cs ->
256 UnpackedLazyByteString 0 addr# final off size cs
257 BSL.Empty -> emptyUnpackedLazyByteString 0
258 in (# init, more, next #)
259 ||]
260 -}
261 {-
262 instance Inputable Text16 where
263 type Cursor Text16 = Int
264 cursorOf qinput = [||
265 let Text16 (Text arr off size) = $$qinput
266 arr# = aBA arr
267 next (I# i#) =
268 (# C# (chr# (word2Int# (indexWord16Array# arr# i#)))
269 , I# (i# +# 1#) #)
270 in (# off, (< size), next #)
271 ||]
272 instance Inputable CharList where
273 type Cursor CharList = OffWith String
274 cursorOf qinput = [||
275 let CharList input = $$qinput
276 next (OffWith i (c:cs)) = (# c, OffWith (i+1) cs #)
277 size = List.length input
278 more (OffWith i _) = i < size
279 --more (OffWith _ []) = False
280 --more _ = True
281 in (# $$offWith input, more, next #)
282 ||]
283 instance Inputable Stream where
284 type Cursor Stream = OffWith Stream
285 cursorOf qinput = [||
286 let next (OffWith o (c :> cs)) = (# c, OffWith (o + 1) cs #)
287 in (# $$offWith $$qinput, const True, next #)
288 ||]
289
290 offWith :: CodeQ (ts -> OffWith ts)
291 offWith = [|| OffWith 0 ||]
292 -}
293 {-
294 -- type instance Cursor CacheText = (Text, Stream)
295 -- type instance Cursor BSL.ByteString = OffWith BSL.ByteString
296 -}
297